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synw (deactivating , n) . 

synw (deactivating , ving) . 

synw (deactivation, n) . 

synw (death, n) . 

synw(demethylate,v) , 

synw (demethylate, vp) . 

synw (demethylated, ved) . 

synw (demethylated , ven) . 

synw (demethylates , vp) . 

synw (demethylating, n) . 

synw(demethylating,ving) . 

synw (demethylat ion, n) . 

synw (dephosphorylate , v) . 

synw (dephosphoryl ate , vp) , 

synw (dephosphorylated, ved) . 

synw (dephosphorylated, ven) . 

synw (dephosphorylates , vp) . 

synw (dephosphorylating, n) . 

synw (dephosphbrylating, ving) . 

synw (dephosphorylat ion, n) . 

synw (die, v) . 

synw ( die, vp) . 

synw (died, ved) . 

synw (died, ven) . 

synw (dies , vp) . 

synw (disassemble , v) . 

synw (disassemble , vp) . 

synw (disassembled, ved). 

synw (disassembled, ven). 

synw (disassembles , vp) . 

synw (disassembling, n) . 

synw (disassembling, ving). 

synw (disassembly , n) . 

synw (discharge , n) . 

synw (discharge , v) . 

synw (discharge , vp) . 

synw (discharged, ved) . 

synw (discharged, ven) . 

synw (discharges, vp) . 

synw (discharging , n) . 

synw (discharging, ving) . 

synw (disengage, v) . 

synw (disengage, vp) . 
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synw (constrain, vp) . 
synw (constrained, ved) . 
synw ( constrained, ven) . 
synw (constraining, n) . 
synw (constraining, ving) . 
synw (constrains, vp) . 
synw (constraint, n) . 
synw (coprecipitate , v) . 
synw(coprecipitate,vp) . 
synw (coprecipitated, ved) . 
synw (coprecipitated, ven) . 
synw(coprecipitates, vp) . 
synw (coprecipitating , n) . 
synw (coprecipitating, ving) . 
synw(coprecipitation ,n) . 
synw (copurif ication ,n). 
synw (copurif ied ,ved) . 
synw (copurif ied ,ven) . 
synw (copurif ies , vp) , 
synw (copurif y ,vp) . 
synw (copurif y,v) . 
synw(copurifying ,n). 
synw(copurifying ,ving) . 
synw (couple ,vp) . 
synw (couple, v) . 
synw (coupled, ved) . 
synw (coupled, ven) . 
synw (couples , vp) . 
synw (coupling, n) . 
synw (coup ling, ving) . 
synw ( cut, n) . 
synw ( cut, v) . 
synw (cut , ved) . 
synw(cut , ven) . 
synw ( cut, vp) . 
synw ( cut s,vp) . 
synw (cutting, n) . 
synw (cutting, ving) . 
synw(deactivate, v) . 
synw (deactivate ,vp) . 
synw (deactivated, ved) . 
synw (deactivated, ven) . 
synw(deactivates, vp) . 
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synw (causing , n) . 

synw (causing, ving) . 

synv; (cleavage, n) . 

synw (cleave, v) . 

synw {cleave, vp) . 

synw(cleaved, ved) . 

synw (cleaved, ven) . 

synw(cleaves,vp) . 

synw (cleaving, n) . 

synw (cleaving, ving) . 

synw (coimmunoprecipitate ,v). 

synw(coimmunoprecipitate, vp) . 

synw (coimmunoprecipitated ,ved). 

synw(coimmunoprecipitated ,ven) . 

synw(coinimunoprecipitates, vp) , 

synw (coimmunoprecipitating ,n). 

synw(coimmunoprecipitating ,ving) . 

synw(coiTnmunoprecipitation ,n). 

synw (combination ,n) , 

synw (combine ,v) . 

synw (combine ,vp) . 

synw (combined ,ved) . 

synw (combined ,ven) . 

synw (combines , vp) . 

synw (combining ,n) . 

synw (combining ,ving) . 

synw (conjugate ,v) . 

synw (conjugate ,vp) . 

synw (conjugated ,ve) . 

synw (conjugated ,ved) . 

synw (conjugates, vp) . 

synw (conjugating ,n) . 

synw (conjugating ,ving) . 

synw (conjugation ,n) . 

synw (connect ,vp) . 

synw (connect ,v) . 

synw (connected ,ve). 

synw (connected ,ved) . 

synw (connecting ,n) . 

synw (connecting ,ving) . 

synw (connection , n) . 

synw (connects , vp) . 

synw (constrain, v) . 
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synw (attached ,ven) . 

synw (attaches, vp) . 

synw (attaching ,n) . 

synw (attaching ,ving) . 

synw (attachment , n) . 

synw (bind, v) . 

synw (bind, vp) . 

synw (binding, h) . 

synw (binding, ving) . 

synw (binds , vp) . 

synw (block, v) . 

synw (block, vp) . 

synw (blockage , n) . 

synw (blocked, ved) . 

synw (blocked, ven) . 

synw (blocking, n) . 

synw (blocking , ving ) . 

synw (blocks, vp) . 

synw (bound, ved) . 

synw ( bound , ven ) . 

synw (break, v) . 

synw ( break, vp) . 

synw (breakage, n) . 

synw (breaking, n) . 
synw (breaking, ving) . 
synw ( breaks, vp) . 
synw (broke, ved) . 
synw (broken, ven) . 
synw (catalyzation, n) . 
synw (catalyze, v) . 
synw ( cat alyze,vp) . 
synw (catalyzed, ved) . 
synw (catalyzed, ven) , 
synw ( cat alyzes,vp) . 
synw (catalyzing, n) . 
synw (catalyzing, ving) . 
synw (causation, n) . 
synw ( cause, n) . 
synw (cause, v) . 
synw (cause , ven) . 
synw (cause, vp) . 
synw (caused, ved) . 
synw ( causes, vp) . 
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synw(activate , vp) . 
synw (activated, ved) . 
synw (activated, ven) . 
synw(activates , vp) . 
synw{activating,n) . 
synw(activating, ving) . 
synw ( ac t ivat ion , n) . 
synw (add, v) . 
synw (add, vp) . 
synw (added, ved) . 
synw ( added , ven) . 
synw ( adding, n) . 
synw (adding, ving) . 
synw (addition, n) . 
synw (adds, vp) . 
synw(af ter, prep) . 
synw (aggregate ,v) . 
synw (aggregate ,vp). 
synw (aggregated ,ved), 
synw (aggregated ,ven). 
synw(aggregates, vp) . 
synw (aggregating , n) . 
synw (aggregating ,ving) 
synw (aggregation ,n). 
synw{arrest , n) . 
synw (arrest , v) . 
synw ( arrest, vp) , 
synw (arrested, ved) . 
synw (arrested, ven) . 
synw (arresting, n) . 
synw (-ar res ting, ving) . 
synw (arrests, vp) . 
synw (associate, v) . 
synw ( associate , vp) . 
synw (associated, ved) , 
synw (associated, ven) . 
synw (associates, vp) . 
synw (associating, n) . 
synw (associating, ving) . 
synw (association, n) . 
synw (attach , v) . 
synw (attach, vp) . 
synw (attached ,ved) . 



PCT/USOO/10302 



Page 6 



BNSDOCID <WO__0083e87Al_U> 



wo 00/63687 



PCT/USOO/10302 



set, [set, freel.vp). 
sets, [sets, free] ,vp) . 
sets, [sets, free],vp). 
setting, [setting, free],n). 
setting, (setting, free],n). 
setting, [setting, free],ving). " 
setting, [setting, free],ving). 
suppress, [suppress, activity, of] ,v) . 
suppress, [suppress, activity, of]*,vp) . 
suppressed, (suppressed, activity, of] ,ved) . 
suppressed, [suppressed, activity, of],ven). 
suppresses, (suppresses, activity, of] ,vp) . 
suppressing, [suppressing, activity, of] ,n) , 
suppressing, [suppressing, activity, of] ,ving) . 
suppression, [suppression, of , activity, of],n). 
switch, [switch, on, the, activity, of],vp). 



switched, 
switched, 
switched, 
switched, 
switched, 
switches , 
up, [up, 
up, [up, 
up, [up, 
up, (up, 
up, [up, 
up, [up, 
up, [up, 
up, [up, 
was, (was 
was, (was 



switched, on, the, activity, of],ved). 

switched, on, the, activity, of] , ved) . 

switched, on, the, activity, of], ved). 

switched, on, the, activity, of] ,ved) . 

switched, on, the, activity, of] ,ved) . 

switches, on, the, activity, of] ,vp) . 

, regulate] ,v) , % A up-regulates B B 



A up-regulates B B 

A up-regulates B B A 

A up-regulates B B A 
% A up-regulates B B 



, regulate] , vp) . % 
, regulated] , ved) . 
, regulated] ,ven) . % 
, regulates] , vp) . 
, regulating] ,n) . % 
, regulating] ,ving) . 
, regulation] ,n) . 
a, means, of , producing] ,ved) . 
due, to] , ved) . 
were, [were , a , means, of , producing] , ved) . % ? 
were, [were, due, to] , ved) . 
acetylate,v) . 
acetylate, vp) . 
acetylated, ved) . 
acetylated, ven) . 
acetylates, vp) . 
acetylating, n) . 
acetylating, ving) . 
acetylation, n) . 
activate, v) . 



"> A 
A 
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n, 
n, 
n, 
n, 
o, 
o, 
o, 
o, 



n, 



o, [o, 



o, 
only, 
prolyl , 
prolyl / 
prolyl , 
prolyl , 
prolyl , 
prolyl . 
prolyl , 
prolyl, 
result , 
result , 



- ' , glycosylates] , vp) . 

- ' , glycosylating] ,n) . 

- • , glycosylating] , ving) . 
- ' ,glycosylation] , n) . 
- ' , terminal , proteolysis] , n) 
glycosylate] , v) . 

- ' , glycosylate] , vp) . 

- ' , glycosylated! , ved) . 

- • , glycosylated] , ven) . 

- ' , glycosylates] ,vp) . 

- ' , glycosylating] , n) . 
- ' , glycosylating] ,ving) . 

- * ,glycosylation] ,n) . 
[only, after] ,prep) . 



[prolyl, ' - ' , 4 , 
[prolyl , ' - ' , 4 , 
[prolyl , ' - ' , 4 , 
[prolyl , ' - • , 4 , 
[prolyl, ' - ' ,4, 
[prolyl, ' - ' ,4, 
[prolyl, ' - ' , 4 , 
[prolyl / • - ' , 4 , 
[result , from] ,v) . 
[result , from] ,vp) 
result, [result, in] ,v) . 
result , [result , in] ,vp) , 
resulted, [resulted, from] 
resulted, [resulted, from] ,ven) . 
resulted, [resulted, in] , ved) . 
resulted, [resulted, in] , ven) . 
resulting, [resulting, from] , n) . 
resulting, [resulting, from] ,ving) . 
resulting, [resulting, in] , n) . 
resulting, [result ing , in] ,ving) . 
results, [results , from] , vp) . 
results, [results, in] ,vp) . 
set, [set, free3,v). 
[se.t, free] ,v) . 
[set , free] , ved) . 
[set , free] , ved) . 
[set, free] ,ven) . 
[set, free], ven). 



, hydroxy late] ,v ) . 
, hydroxylate] ,vp) . 
, hydroxy la ted] , ved ) . 
, hydroxylated] ,ven ) , 
, hydroxylates] ,vp) . 
, hydroxy 1 at ing] ,n ) . 
, hydroxylafcing] , ving ) 
, hydroxy la t ion] ,n) . 



, ved) 



set 
set 
set 
set 
set 
set 



[set, free] , vp) 
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having, [having, an, active , role, in] ,n) . 
having, [having, an, active , role, in] , ving) . 
is, (is , a, means , of , producing) , vp) . 
is, [is, due, to] , vp) . 

functions, [functions , as , a , negative, regulator , of ] ,vp) . 

function, [function, as, a, negative, regulator, of] , vp) . 

lead, [lead, to] , v) . 

leads , [leads , to] , vp) . 

leading, [leading, to] ,n) . 

leading, (leading, to] , ving ). 

leads, [leads , to] , vp ), 

led, [led, to] , ved) . 

led, (led,to) ,ven) . 

may, [may , be , responsible, for] , vp) . 

mediate, [mediate, a, signal] , v) . %A mediates a signal to 

mediate, [mediate, a, signal] , vp) . 
mediated, (mediated, a, signal] , ved) . 
mediated, (mediated, a, signal] , ven) . 
mediates, (mediates, a, signal] , vp) . 
mediating, [mediating, a, signal] n) . 
mediating, [mediating, a, signal] , ving) . 

signal] , n) . 



synp 


(mediation, [mediation, of , a. 


synp 


(n. 


[n, ' 


- * , acetylate] , v) . 


synp 


(n. 


[n. 


- * , acetylate] , vp) . 


synp 


(n, 


(n. 


- • , acetylated] , ved) . 


synp 


(n, 


[n. 


- ' , acetylated] , ven) . 


synp 


(n, 


(n, ' 


- ' , acetylates] , vp) . 


synp 


(n, 


In, ' 


- • , acetylatingl , n) . 


synp 


(n, 


[n, ' 


- • , acetylatingl ,ving) . 


synp 


(n, 


(n, 


- ' , acetylation] , n) . 


synp 


(n. 


[n, ' 


- ' , acylate] , v) . 


synp 


(n, 


[n, ' 


- • , acylate] ,vp) . 


synp 


(n, 


[n. 


- ' , acylated] , ved) . 


synp 


(n. 


[n. 


- ' , acylated] , ven) . 


synp 


(n. 


[n. 


- ' , acylates] , vp) . 


synp 


(n. 


[n. 


- * , acylating] , n) . 


synp 


(n. 


[n, 


- • , acylating] , ving) . 


synp 


(n. 


(n, 


- ' , acylation] , n) . 


synp 


(n. 


[n, 


- ' , glycosylate] ,v) . 


synp 


(n. 


(n, 


* ' , glycosylate] ,vp) . 


synp 


(n, 


[n, 


- ' , glycosylated] ,ved) . 


synp 


(n. 


[n, 


- ' , glycosylated] ,ven) . 
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being, [being, due, to] , n) . 
being, [being, due, to] ,ving) . 
caused, [caused, by] ,ved) , 
caused; [caused, by] , ven) . 
convey, [convey, a, signal] ,v) . 
convey, [convey, a, signal] ,vp) . 
conveyed, (conveyed, a, signal] ,ved) . 
conveyed, (conveyed, a, signal] ,ven) 
conveying, (conveying, a, signal] ,ving) . 
conveying, [conveying, a, signal] ,n) . 
conveys , [conveys , a , signal ] , vp) . 
dissociate, [dissociate, from] ,vp) . 
dissociate, [dissociate , from] ,v) . 
dissociated, [dissociated, from] ,ved) . 
dissociated, [dissociated, from] ,ven) . 
dissociates, [dissociates, from] ,vp) . 
dissociating, [dissociating, from] ,n) . 
dissociating, [dissociating, from] , ving) . 
dissociation, [dissociation, froml,n) . 



down, [down, ' - 
down, [down, ' - 
B 

down, [down, • - 
down, [down, ' - 
down, [down, ' - 
down, [down, ' - 
down, [down, ' - 
down, [down, ' - 



, regulate] , v) . 

, regulate] ,vp) . % A down- regulates B 



, regulated] ,ved) . 
, regulated] ,ven) . 
, regulates] ,vp) . 
, regulating] ,nl . 
, regulating] ,ving) . 
, regulation] ,n) . 
due , [due , to , the , fact , that ] , adj ) . 
due, [due, to] , ad j } . % ? 
form, [form, complex] ,v) . 
form, [form, complex] ,vp) . 
formation, [formation, of, complex] ,n) 
formed, [formed, complex] ,ved) . 
formed, (formed, complex] , ven) . 
forming, [forming, complex] ,n) . 
forming, [forming, complex] ,ving) . 
forms, [forms, complex] ,vp) . 
had, [had, an, active , role , in] , ved) . 
had, [had, an, active, role, in] ,ven) . 
has, [has , an, active, role , in] , vp) . 
have, [have, an, active , role, in] ,v) . 
have, [have, an, active , role , in] ,vp) . 
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% lexsyn.pat 

% revised March 17, 2000 

% SYNTACTIC LEXICON FOR ACTIONS 

% Contains syntactic entries for action type words and phrases 

% 

% synp (+Wordl, +Wordlist , +Syn) 

% synp: Wordl is first word of phrase, Wordlist is list of words i 
n phrase 

% synp: Syn is syntactic categorey 
% 

% synw (+ Word, + Syn) is same as synp except there is no wordlist 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

synp (account , [account , for j ,v) . 

synp (account , [account , for] ,vp) . 

synp (accounted, [accounted, for] ,ved) . 

synp (accounted, [accounted, for] ,ven) . 

synp (accounting, [accounting, for] ,ving) . 

synp (accounting, [accounting, for] ,n) . 

synp (accounts , [accounts , for] ,vp) . 

synp (add, [add, up] ,vp) . 

synp (add, [add, up] ,v) . 

synp (added, [added, up].,ved) . 

synp (added, [added, up] ,ven) . 

synp (adding, [adding, up] ,n) . 

synp (adding, [adding, up] ,ving) . 

synp (adds, [adds, up] ,vp) . 

synp(am, [am, a, means, of , producing] ,vp) . 

synp (am, [am, due , to] , vp) . 

synp(are, [are, a, means, of , producing] , vp) . 

synp{are, [are, due , to] , vp) . 

synp (as, [as, a, result , of ] ,prep) . 

synp (attributable, [attributable, to] ,vp) . % ? 

synp (attributed, [attributed, to] ,ven) . 

synp (based, [based, on] ,ven) . 

synp (based, [based, upon] ,ven) . 

synp(be, [be, a, means, of , producing] , v) . 

synp (be, [be, due, to] ,v) . 

synp (because , [because, of] ,prep) . 

synp(been, [been, a, means , of , producing] , ven) . 

synp (been, [been, due, to] ,ven) . 

synp(being, [being, a, means , of , producing] , n) . 

synp(being, (being, a , means , of , producing] ,ving) . 
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wdef (pkc, protein, 'protein kinase C*). 
wdef (position, site, site), 
wdef (positions, site, site), 
wdef (protease, protein, protease) . 
wdef (psl, protein, 'presenilin 1') . 
wdef (ps2 , protein, 'presenilin 2') . 
wdef(rapl, protein, ' Rapl ' ) , 
wdef(ras, protein, 'Ras'), 
wdef (receptors, substance, receptor), 
wdef (rela, protein, 'RelA'). 
wdef (residues, substance, residue) . 
wdef (responsive, state, active). 
wdef(s6, protein, 'S6'), 

wdef (selectively, constraint, selective). 
wdef(serll2, site, 'Serll2'). 
wdef(serl36, site, 'Serise'). 
wdef(ser32, stnallmolecule, •Ser32*). 
phrase (psl, protein 

wdef(ser36, smallmolecule, 'SerBS'). 

phrase (psl, protein, [psl , ' - ' , ctf ] , . ' psl -ctf ' , r) . 

wde f ( sh2 , domain , ' SH2 * ) . 

wdef (sh3, domain, 'SH3 ♦ ) . 

wdef (she, protein, * She ' ) . 

wdef (signal some , complex, signal some) . 

wdef (sites, site, site). 

wdef(sos, protein, 'Sos'). 

wdef (staurosporine, smallmolecule, staurosporine) . 

wdef (sts, smallmolecule, *STS') . 

wdef (tor, complex, *T-cell receptor'). 

wdef (tetracycline, smallmolecule, tetracycline) . 

wdef •(thr229, aminoacid, 'Thr229 • ) , 

wdef (thr308, aminoacid, 'ThrSOS') . 

wdef(thr389, aminoacid, 'Thr389'). 

wdef (threonine, aminoacid, threonine) . 

wdef (tyrosine, aminoacid, tyrosine). 

wdef (unresponsive, state , inactive) . 

wdef (unstimulated, state, inactive). 

wdef (zvad, smallmolecule, 'zVAD') . 
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wdef (cpp32, protein, •CPP32 ' ) . 
wdef(crkl, protein, 'CrkL'). 

wdef (ctf, substance, ' COOH- terminal fragment') . 

wdef (cytokine r smallmolecule , cytokine). 

wdef (cytosol , structure, cytosol) . 

wdef (djnk, protein, 'DJNK') . 

wdef(djun, protein, 'DJun'). 

wdef (dynamitin, protein, dynamitin) . 

wdef(erk, protein, 'ERK'). 

wdef (e to, smallmolecule, 'ETO*) , 

wdef (etoposide, smallmolecule , etoposide) . 

wdef ( fad, disease familial Alzheimer s disease')- 

wdef(fyn, protein, 'Fyn'). 

wdef (gdp, smallmolecule, 'GDP') , 

wdef (gelsolin, protein, gelsolin) . 

wdef {gpl20 , protein, 'gpl20') . 

wdef(grb2, protein, *Grb2'). 

wdef(gst, protein, 'glutathione S-transf erase ') . 

wdef (gtp, smallmolecule, 'OTP') . 

wdef (hsp70, protein, 'HSPTO') . 

wdef (human, species, human). 

wdef (ikk, protein, • IKK' ) . 

wdef (inactivated, state, inactive). 

wdef ( inactive, state, inactive). 

wdef(jnk, protein, 'JNK'). 

wdef(jnk, protein, 'JNK'). 

wdef{jnk2, protein,' JNK2 ' ) . 

wdef (kap3 , protein, kap3 ) . 

wdef(kdakt, protein, ' KDAkt ' ) . 

wdef (kinase, protein, kinase). 

wdef (kinectin, protein, kinectin) . 

wdef (klc , protein, klc) . 

wdef (lamin, protein, lamin) . 

wdef (myosins, protein, myosins) . 

wdef (nmdar, protein, 'NMDAR'), 

wdef (nmdar2b, protein, 'NMDAR2B'). 

wdef (ntf , substance, ' NH2 -terminal fragment ' ) . 

wdef (p70s6k, protein, p70s6k) . 

wdef(p78s6k, protein, p78s6k) . 

wdef (parp, protein, ' poly (ADP-ribose) polymerase ' ) . 
wdef(pdkl, protein, ' PDKl • ) . 
wdef (peptides, protein, peptide), 
wdef (pkb, protein, 'PKB' ) . 
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phrase(t, cell, [t,'-',dr7], •t-DR7',r). 

phrase{t, cell, [t, ' - ' ,drt, '/ ' ,b7, • - M] , ' t -DR7/B7-1 » , r) . 
phrase(t, cell, [t,cell], 'T cellar). 
phrase(t, cell, [t, cells], 'T cellar) - 

phrase (t, complex, [t, cell , receptor] , 'T-cell receptor •,r) . 

phrase (t, cell, [t, ' - ' , dr7 , cells) , •t-DR7 cells' , r) , 

phrase {t, cell, [t , ' - ' , dr7 , • / ' , b7 , ' - • , l ] , ' t-DR7/B7-l' , r) . 

phrase (t , complex, [t, cell , antigen, receptor] , 'T-cell antigen red 

eptor ' , r) . 

phrase (threonine, aminoacid, [threonine, 229], 'threonine 229', r) 



phrase (transcription, protein, [transcription, factor], 'transcript 
ion factor ' , r) . 

phrase (trypan, smallmolecule, 'trypan blue',r) . 

phrase (wt , protein, [wt , akt] , ' WT Akt ' , r) , 

phrase (zap, protein, [zap, ' - ' , 70] , ' ZAP-70 ' , r) . 

phrase (zdevd, smallmolecule, [zdevd, ' - ' , f mk] , ' zDEVD-f mk ' , r) . 

phrase (il , protein, [il , ' - ♦ , 3] , ' interleukin-3 • , r) . 

wdef(ab, complex, antibody). 

wdef(actin, protein, actin) . 

wdef (activated, state, active). 

wdef (active, state, active). 

wdef (ad, disease, 'Alzheimer' • ' 's disease') . 

wdef (age, protein, 'AGC') . 

wdef (akt, protein, 'AKT'). 

wdef (anergic, state, inactive) . 

wdef (anergic , state , inactive) . 

wdef (anergy, state, inactive) . 

wdef (antibody, complex, antibody) . 

wdef (antigen, substance, antigen). 

wdef(aop, protein, 'Aop'). 

wdef (apoptosis , process , apoptosis) . 

wdef (bad, protein, 'BAD'). 

wdef(c3g, protein, 'C3G'). 

wdef ( ' ca2+ ' , smallmolecule, ' Ca2+ ' ) . 

wdef(cas, protein, ' Cas * ) . 

wdef (caspase, protein, caspase) . 

wdef (caspase, protein, caspase) . 

wdef(cbl, protein, ' Cbl ' ) . 

wdef (ccrsrh, protein, 'CCRSrh') . 

wdef(cd28, protein, 'CD28'). 

wdef (cells, structure, cell). 

wdef (cholesterol, smallmolecule, cholesterol) . 
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phosphorylate^r) . 

phrase (phosphatidyl inositol , smallmolecule , [phosphatidyl inositol , 1 
/',',4,»,',5,'-', triphosphate] , ' phosphat idylinositol 1,4, 5-tripho 
• sphate ' , r ) . 

phrase (phosphoinositide, protein, [phosphoinositide , ' - ' , dependent, 

protein, kinase], •PDKl*,r). 
phrase (phospholipase, protein, [phospholipase, c, *-',l] , 'phospholip 

ase C-1 • , r) 

phrase (poly, protein, [poly, • ( ' ,adp, • ,ribose, ' ) ' , polymerase] , 'poly 
(ADP-ribose) polymerase' , r> . 

phrase (polyvinylidene, structure, [polyvinyl idene , difluoride, memb 
ranes] , 'polyvinylidene difluoride membranes r) . 
phrase (presenilin, protein, [presenilin, 1] , 'presenilin l',r) . 
phrase (presenilin, protein, [presenilin, 2] , 'presenilin, 2 r) . 
phrase (productively, state, [product ively, stimulated] , active, r) . 
phrase (protein, protein, [protein, tyrosine , kinase] , 'protein tyrosi 
ne kinase ' , r) . 

phrase (protein, protein, [protein , kinase , c] , 'protein kinase C',r) . 
phrase (ps2, substance, [ps2, '-' ,ctf] , 'presenilin 2 COOH- terminal fra 
gment ' , r) . ' 

phrase (ps2, substance, [ps2, cleavage, fragment] , 'presenilin 2 cleava 
ge fragment ' , r) . 

phrase (pvdf, structure, [pvdf, membranes] , 'polyvinylidene difluori 
de membranes' , r) . 

phrase (raf, protein, [raf,* -',!], 'Raf-l', r) . 
phrase (raf , protein, (raf, '-•,!], 'Raf-l ' , r) . 
phrase (rapl , complex, [rapl, ' - • ,gtp] , 'Rapl-GTP' ,r) . 
phrase (requirement , need2 , [requirement, for], need,r). 
phrase(ser, smallmolecule, [ser, 19], • Ser 19*, r). 
phrase(ser, smallmolecule, [ser, 23], 'Ser 23', r). 

phrase (serine, substance, [serine, residues], 'serine residues', r 
) . 

phrase (src, domain, Esrc, homology, 2], ' Src homology 2 ' , r) . 
phrase(src, domain, [src, homology, 3], 'Src homology 3 ' , r) . 
phrase (srebp, protein, [srebp, '-', 1] , ' sterol -regulatory element bin 
ding protein 1 ' , r) . 

phrase (srebp, protein, [srebp, ' - ' ,2] , ' sterol -regulatory element bin 
ding protein 2 ' , r) . 

phrase ( sterol , protein, [sterol, ' - ' , regulatory, element , binding, prote 
in,l] , • sterol -regulatory element binding protein l',r) . 
phrase (sterol, protein, [sterol, , regulatory, element , binding, prote 
in, 2] , * sterol -regulatory element binding protein 2 ' , r) . 
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phrase 
phrase 
) . 

phrase 
phrase 
phrase 
phrase 
) . 

phrase 

phrase 

^r) . 

phrase 

me] , ' 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

phrase 

MDAR' , 

phrase 

phrase 

phrase 

phrase 

phrase 

agment 

phrase 

, r) . 

phrase 

,r) . 

phrase 

phrase 

, r) . 

phrase 

phrase 



ice, protein, [ice, ' / ' , ced, ' - ' , 3] , ' ICE/Ced-3 ' , r) , 

il, gene, [il , ' - ' , 2 , gene] , 'gene encoding interleulcin-2 • , 

il, protein, [il,»-',2], • interleulcin-2 ' , r) . 

in, interm, fin, the, case, of] , [] , r) . 

in, state, [in, the , anergic , state] , inactive, r) . 

inducible, cell, [inducible , h4 , cell] , 'inducible H4 cell' 



interleukin, protein, [interleukin, ' - ♦ 
interleukin, protein, [interleukin, » - ' 



2] ,r) . 
3] , * interleukin-3 



interleukin, protein, [interleukin, ,1 , beta, converting, enzy 

nterleukin-1 beta converting enzyme r) . 

jurkat, cell, [jurkat, cell], ' Jurkat cell', r) . 

jurkat, cell, [jurkat^ cells], 'Jurkat cell', r) . . 

kif 3a, protein, [kif3a, • / ' , 3 , b] , 'KIF3A/3B' ,r) . 

Ibl, cell, [Ibl, • ,drf , cells], ' LBL-DR7 cells ', r) . 

lbl,cell, [Ibl, • - ' ,dr7, cells] , 'LBL-DR? cells' ,r) . 

let, protein, [let , ' - ' , 23] , 'Let-23', r) . 

may, probability, [may, be] , possible, r) . 



myc, protein, [myc, 
myc, protein, [myc, 
myc, protein, [myc, ' - ' 
myc, protein, [myc, 



- ' , p70s6kd3e] , ' Myc -p70s6kD3E ' , r) . 
-', pdkl] , 'Myc-PDKl ' ,r) . 
p70s6k] , 'Myc-p70s6k' , r) • 
p70s6ke389d3e] , ' Myc-p70s6kE389D3E » , r) 



myr, protein, [myr, akt] , ' Myr-Akt • , r) . 

n, protein, [n, ' - ' , methyl, ' - ' ,d, ' - ' , aspartate, receptor] , 'N 
r) . 

n, protein, [n, methyl d, aspartate] , 'NMDA'). 

native, cell, [native, h4 , cell] ,' native H4 cell',r). 

nf, protein, [nf ,'-',»[', kappa, ']', b] , 'NF- [kappa] B », r) , 

nh2, site, [nh2 terminal ] , ' NH2 - terminal r) . 

nh2 , substance, [nh2 terminal , fragment] , ' NH2 -terminal fr 

,r) . 

nih, cell, [nih, ,3, 1 3, fibroblasts] , •NIH-3T3 fibroblasts' 

nih,cell, [nih, ' - ' , • 3t3 ' , fibroblasts] , »NIH-3T3 fibroblasts ' 

normal, sxibstance, [normal, ntf] , 'normal NTF' , r) . 

nuclear, protein, [nuclear, factor, kappa, b) ,' NP- [kappa] B ' 

plSOGlued, protein, [plSOGlued, - , arpl] , ' pl50Glued-Arpl ' , r) . 
phosphate, phosphorylate2 , [phosphate, incorporated, into] , 
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phrase (caspase, protein, [caspase, ' - ' , 6] , 'caspase-6 ' , r) . 

phrase (caspase , protein, [caspase, ' - ' , 7] , 'caspase-7 ' , r) . 

phrase (catalytic, domain, [catalytic, domain], * catalytic domain ' , 

r) . * 

phrase (cleavage, site, [cleavage, site] , 'cleavage site',r). 

phrase (cleavage, substance, [cleavage , products] , 'cleavage products*, 

r) . 

phrase (cooh, substance, [cooh, ' - • , terminal, fragment] , ' COOH- termina 

1 fragment ' , r) . • - 

phrase (crk, protein, [crk, proteins] , ' crk proteins rO . 

phrase (crkl, complex, [crkl, '-', c3g, complex] , *crkl-c3g complex ',r) . 

phrase (dcp, protein, [dcp, -,1] , •DCP-l'",r) . 

phrase (did, negation, [did, not], not, r) . 

phrase (ebv, species , ' Epstein-Barr virus ' , r) . 

phrase (epstein, species, [epstein, ,barr, virus] , 'Epstein-Barr vi 
rus ' , r) . 

phrase (familial , disease, [familial , alzheimer, ' ' • • , s, disease] , ' f amil 
ial Alzheimer' ' • 's disease' , r) . 

phrase (gene, gene, [gene, encoding, interleukin, • - • , 2] , 'gene encodin 
g interleukin-2 ' , r) . 

phrase (gst , protein, [gst , ' - • , ' f yn ' , ' - ' , sh2] , 'GST-Fyn-SH2 ' , r) . 
phrase (gst, protein, [gst fyn ','-', sh3 ] , ' GST-Fyn-SH3 ' , r) . 
phrase(gtp, complex, [gtp, exchange, of , rapl] ,' GTP exchange of Rapl ' , 
r) . 

phrase (guanidine, protein, [guanidine , nucleotide, *-' , releasing, fac 
tor,c3g] , 'guanidine nucleotide -releasing factor ,r) . 

phrase (guanidine, smallmolecule , [guanidine, nucleotide] , 'guanidine 
nucleotide' ,r) . 

phrase (guanosine, smallmolecule, [guanosine , triphosphate] , 'guanosin 
e triphosphate ' , r) . 

phrase (guanosine , smallmolecule, [guanosine , diphosphate] , 'guanosine 
diphosphate r) . 

phrase (h4, cell, [h4, cell, line] , •H4 cell line',r). 

phrase (h4 , cell , [h4 , human, neuroglioma , cells] , ' H4 , human, neuroglioma 
, cells • , r) . 

phrase (ha, protein, [ha, '-', '.[', delta, •]', phpkb] ,' HA- [Delta] PHPK 
B' ,r) . 

phrase (hla, protein, [hla , ' - ' , dr? ] , ' HLA-DR? ' , r) . 

phrased, protein, [i, •[', kappa, ' 1 ', b, •-',»[', beta ,']•] , ' I [ka 
ppa] B- [beta] ' ,r) . 

phrased, protein, [i, •[', kappa, ']', b, '-»,•[', alpha, ')'] , ' I [kap 
pa] B- [alpha] ' , r) . 

phrase (i, protein, [i, »[', kappa, ']',b], ' I [kappa] B ' , r) . 
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% lexsemsub.pl 

% lexsemsub . pat 

% revised March 17, 2000 

% LEXICON OF SUBSTANCES AND STRUCTURES 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
: -multifile (phrase/5) . 
: -multifile (wdef/3) . 
: -unknown fail ) . 

phrase ( ' [ ' , protein, [ ' [ ' , gamma aminobutyric , acid, a] , ' GA 
BAA' , r) . % ? 

phrase ('[', smallmolecule , [ * [ ' , zeta, ' ] ' , 1 , subunit] , • [zeta] 1 subu 
nit ' , r) . % ? 

phrase (116 , protein, [116 , ' - ' , kd, fyn, ' - ' , associated, proteinl , ' 116-k 
D Fyn-associated protein' , r) . 

phrase (116, protein, [116, kd, protein] , '116-kd protein ',r). 

phrase (3 , protein, [3 , ' - ' , kinase, ' - ' , akt] , ' 3 -kinase-Akt ' , r) . 

phrase (ability, affirmation, [ability, to], f] , r) , 

phrase (age, protein, [age, protein, kinases], 'AGC , r) . 

phrase (akt , protein, [akt, mutant], 'Akt mutant*, r) . 

phrase (alternative, substance, [alternative, ntf] , 'alternative NTP* ,r 

) . 

phrase (antibody, protein, [antibody, to, phosphotyrosine] , 'anti-phosp 
hotyrosine ' , r) . • 

phrase (antigen, complex, (antigen, receptor] , 'antigen receptor ', r) . 
phrase (ap, protein, [ap, ' - ' , 1] , * AP-1 ' , r) . 

phrase (aspargine, site, [aspargine, '-',141], • aspargine- 14 1 ' ,.r) . 
phrase (b, cell, Eb,cell], 'B cell', r) . 
phrase (b, cell, [b, cells], 'B cell', r) . 

phrase(b, species, [b, lymphoblastoid, cells] , 'B lymphoblastoid cell 
s ' , r) . 

phrase (b, cell , [b, lymphoblastoid, cells] , 'B lymphoblastoid cells ',r 
) . 

phrase (b7, protein, [b7 ,'-',' 1 •] , 'BT-l^r). 
phrase (bcl, protein, [bcl, ' - ' , 2] , 'Bel -2 ' ,r) . 
phrase (c, protein, [c,'-',jun] , •c-Jun',r). 
phrase (camk, protein, [camk, iv] , ' CaMK IV', r). 
phrase (casp, protein, [casp 3 ] , ' caspase-3 • , r) . 

phrase (caspase , protein, [caspase, • - ' , 3 , family , protease] , • easpase-3 

family protease ', r) . 
phrase (caspase , protein, [caspase , ' - • , 3 , precursor] , ' caspase-3 precur 
sor ' , r) . 

phrase (caspase, protein, [caspase, ' - ' , 3] , ' caspase-3 ' , r) . 
phrase (caspase, protein, [caspase, 3] , •caspase-3' ,r) . 

l]f>(>Phdi}C A 
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100 1 gip506888|MADm 

1 gip29S78|MM)h 

gt|2782362IMa<Mh 



A 

)l — old 



1 00 ' 0P1 37499|MacJ4m 

flipl37498|Mad3m 



100 



lOOj gi|2506888|MADm 

"I gi|729978|MAD 



- gi[2792362|Mad4 
100^ gi|2137499|M8d4m 
-gi)2137498|Mad3m 
Mad3h Putative 
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>AfiiiDh(PutatJVc) 

MEPlASNIQAn-LQAAEFLERREREAEHGYASLCPHRSPGPlHRRKKRPPQAPGAQDSGRSVHNELEKWWV^ 

RCLERLKQQMPLGGDCARYTTLSLLRRARMHIOKLEDQEQRARQLKEPa^RTKQQSLQRXWMQLRGlAGAAERER 

LRADSLD5SGL55£RSD5Z)0E£LEVDVE;SLVFGGEA£LLRGFVAG0EHSYSHVGGAHL 



gi 1 250688ejnAtlB KATA.«1II101.U:-«AADY!.EMlE«AEIIGrASMLPYS-KE»MrifWUIKPI^ 

9i 1729978 IHAOh HJUUkVR»JII0t1LUAADVU:R»RLAEHSyMKLPVnfK3R&KUC1Ulllk51QCNMS--S5H^ 

«i 1 21»2>62 I Mth — MEUiSLLI LLEAACYLEftftORtAEHCYASVLPrDCDf AR£irrKKACl.VMUtf-*10IMSIII«CLElCMM^^ 

91 12131*99 IHMltB K£lJlSLlXLL£AAtVLERWUACHSYMKLProeSFAWa(TKTAGLVMCGP->iniR5SHJtELEKMI^^ 

91 1 2 1 3'>« 9B f MadlB -HEP OVLtOAAErLEMCKEACICVAS lX?HHSPG7VCMfUCPPl«MKiAU)SCI^>QOfELEKIUUU«LICRC1^0I.RaaH^ UL 

n«33t> Putailv* >MEPULSIII<irVLl.OM7LSR»R£AEBSYASlXPIWSPGPIil]UUOCIlPPOA»CA0I»C«SV^ 

«i l2!>0«Bea IHASb TKAKLMI IOCUa)C0WWVMOlP0U)«OBIUjn>,lli.tirLSAg|tTO KDSVC*5WS5SR5DSOU£U)\«N''DVDS.ir/ir/SCtir:iJCOUMSSS- 

9i 1 729978 l>lM>b TXAKLHIJOCLEDCDMCAVHOIDOLOUOMlUniOLClCLClSAJIi KDSIO-ST^'SSEIlSOiSDRS — «->IJOWSVS5TDyLTCDU9W55SS 

91 1 3793)62 iHAdlh ICRWC-/aiI0Cl.BE00RWLLSXICE01O0EimrLXRIlLt0LSVQSVm VltTDSn*SAVSTD— DSSQE — VDISCatEfCPGELOSVCS- 

9i I 2137199 IHadla K-MCHMIfOCLEEQOMlALSXICEQi^UHM^UnUlLEQLSVOSVR VKTDSTC^SAVSTD—DSEQE— V1>tECKErOPCCU)SAG$> 

91 1 2 1 374 98 IMad2k» 8.AAVK10KI.EEOCOO^MXKEKl.R5KQOS LDO0LEQL0Gl.PGJ)JlEREiaiWaSU>SSCL5SEftSI)SD(^-*-^---<41f «1>^^1'L^ 

n«d3h Pul«liv« RRAHKB: flpClJPOEO»A»OLXEIL. WKQQS LOMtWHOLRSlAGMvEltfiaMOS LD5SCI.SSERSDSD0E-*— *— ELT.Ty^ LVFG-CSMUJICr 



9it2&068BeiMACto 
fftT»29978J»«nh 
9112792362 IH«d«b 
9ll2137«99ins(li» 
91I2137498II 



VSDSDEItCSNOS U^SDECY5SAr»lCIUUaQ0GIDCM#U;'^ 
VSDSOZttaSHOSU^SOaSYSSrSfilklKlJOpSEKhCiJGl 

SS DADOKYS UJSGTOSOSeFCraCMLeWALS— - 

SSI>ADDHYSU)S86eS0SSYGIIKNlKC»GI.S-*<^-~ 
SACAEUSrSRSTCAin. — 
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BASE COUNT 130 a 234 c 258 g 106 t 5 others 

ORIGIN 

1 cagccgcttg ctccggccgg caccctaggc cgcagtccgc caggctgtcg ccgacatgga 
61 acccttggcc agcaacatcc aggtcctgct gcaggcggcc gagttcctgg agcgccgtga 
121 gagagaggcc gagcatggtt atgcgtccct gtgcccgcat cgcagtccag gccccatcca 
181 caggaggaag aagcgacccc cccaggctcc tggcgcgcag gacagcgggc ggtcagtgca 
241 caatgaactg gagaagcgca ggagggccca gttgaagcgg tgcctggagc ggctgaagca 
301 gcagatgccc ctgggcggcg actgtgcccg gtacaccacg ctgagcctgc tgcgccgtgc 
361 caggatgcac atccagaagc tggaggatca ggagcagcgg gcccgacagc tcaaggagag 
421 gctgcgcaca aagcagcaga gcctgcagcg gcantggatg cagctccggg ggctggcagg 
4 81 ngcggccgag cgggagcgnc tgcgggcgga cagtctggac tcctcaggcc tctcctctga 
541 gcgctcagac tcagaccaag aggagctgga ggtggatgtg gagagcctgg tgtttggggg 
601 tgaggccgag ctgctgcggg gcttcgtcgc cggccaggag cacagctact cgcacgncgg 
661 cggcgcctgg <:tatgatgtt cctcacccan ggcgggcctc tgccctctta ctcgttgccc 
721 aagcccactt tnc 
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TBXABTK 2.0.e {iJan-OS-109«I 

Reference: 

Adtschul, Stephen f ., Thomas L. Madden, Me^andro A. SchSCtec, 
Jlnghui Zhang* Zheny Zhang* Webb Miller* and David J. X4.p»«n (1991), 
"Gapped BIAST and PSX-BIAST: a new generation of protein database search 
programs**. Nucleic Acids Res. 25:3389-3402. 

Ouery- gi l213T4 98|Mad3m 
(205 letters) 

gb|AA27e224 1^216224 xslleOS.rl MCI 06AP GCBl Hono sapiens cONA clone XMAGE:703&20 S* 
similar to TII;G11B4151 61ie41S1 MAX-XHTERACTZNG 
TRANSCRXerXONAX. REPRESSOR. ; 
Length '■430 

Score - 209 bits (526), Expect - le-53 

Identities - 104/124 (e3i)* Positives - 116/124 1 921), Gaps - 1/124 (0%) 
frame - +2 

Query: 1 MEPVASNZQVI.L0^ACrLCRREREAEKGYAStJCPHKSPGTVCRRRKPPl.QAP6ALNSGRS 60 

K£P-»ASNZQVLLOAA£rLERRER£AEHGYASLCPH SPG * RRi-K P (2APGA -t^SGRS 
Sbjct: 56 MCPlASNZOVLLaAA£FZ.CRKER£A£H6YASLCPKR5PGFlHRRKKRPPOftPGAQ05GRS 235 

Query: 61 VHNEIXmRRAQLKRClXQLRQQMPI^DCTRYTTLSLt-RARVHlQKLEEQEQQMUULK 119 

VKHELEKRRRAQUOtCLE't'Li^QaMPLG DC RYTTLSLL RAR-^HZQKLE•»•Q£(HAR•«-LK 
Sbjct: 236 \mK£IXKKRRAQLKRCLERU<QQHPLGGOCARYTTLSLLRRAKHKZQKLEOQEQRARQLK 415 

Query: 120 EKLRS 124 
E'^tR4 

Sbjct: 416 ERLRT 430 

db^ IC02407 tC02407 KUMGS0012279« Human Gene Signature, 3'-dlrected cDHA sequence, 
length "348 

Score - 97.5 bits (239)* Expect - ee-20 
IdentitLies - 51/63 (B0«), Positives • 56/63 (81%) 
Frame - +3 

Query: 125 KQQSLOQOLEQLQGLPGARERCRLRAOSLDSSGLSSERSDSDQEOLEVDVENLVrGTETE 164 

KQQSLQ+ QL<t-6L GA ER£RLRADSLDSSGLSSERSD5DQE<f>LEVDV£4>LVPG E E 
Sbjct: 45 KQQSLQRXHMQLRGLAGAAERERLRAOSLDSSGLSSERSDSDQEELEVDVESLVFGGEAE 224 

Query: 185 IdJQ 187 

Sbjct: 225 IXA 233 
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Activated CD4^T.ceMs 
Rptl (represses expression of IL-2 receptor) 



1 

1 



IL-2 receptor ^ normal expression of Bcl2 



IL.2,IL-15 

normal apoptosis 



When rptl is knocked out: 

IL.2, IL.15 

I 

|1 IL-2 receptoi ► overexpression of Bcl2 

apoptosis 




Figure 16 
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>sp|P155331RPTl„MOUSE DOWN REGULATORY PROTEIN 
OF INTERLEUKIN 2 RECEPTOR (J03776) rpt-lr [Mus 
musculus] Length = 353 

Score = 92.0 bits (237), Expect = 6e-20 

Query 194 VMELIXSDLTCPICCSLFDDPRVLPCSKNPCKKCLEGZIfGSVIWSMiiniPAPFKCPTC 373 

V+E+++E++TCPIC L -fP C+H+PC+ C-»" B S RN+ CP CR 

Sbjct 5 >a*lWlKEEVTCPlCLELLKEPVSAIX:NHSFCRACITLNyE-SNRNT DGWa«CPVCRV 60 

Query 374 ETSATGINSLQVNYSLKGIVEKYNKIKISP KMPVCKOTMGQPLNIFCLTDMQLICG 541 

" +L+ N + IVi;+ K P K+ +C H G+ L +FC DM +IC 
Sbjct 61 PyP FGNLRPNLHVWrVERIJCGFKSIPEEEQKVNICAQH-aEKLKLFCRKDMMVICW 116 

Qu«ry S42 ICATRGEHTKHNTPCSIEDAYAOERIAFESLFOSF ETHRRGDALSIOJmiETSK 700 

•►C EH H IB* + ++ + + W+ L R-J-D 
Sb3Ct 117 U:ERSQEHRGHQTJU.lEEVX>OEyKEKLQGia^na>a<KAiaCDEWODDliQLQP^^ 171 

Query 701 RKSLQlWWSSDKNnCEFFEKX^HTLDQKSaJEIl^DFETMiaAVMOAYDPEINKL 862 

+Q* + + V+ F+ L+ ID K+NE L + K VM+ N+L 
Sbjct 172 ENQIC2 m^NVQRQFKGIJU>LII)SKEN£EIiQ)aJCKEKKEVKEKLE£5EN£L 222 



Homology covers ring finger, B«box and the beginning of coiled coil domain 
in the CLL ring finger protein 
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o 10 

1 SRSXQKFFQELSK 

31 L L T A F EFGNAGAV 

61 QQKIIPVVVKMFS 

91 IQYLDEPTVNTQI 

121 QTVKSMLLLAPKL 

151 AKDEQGPIRCNTT 

181 VLTSAFSRATRDP 

211 LYSMNDCAQKILP 

241 FKAXRSFLSKLES 

271 ASSPGMGGA.AASW 
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BASE COUNT 4 05 a 545 c 4 93 g 278 t 6 others 

ORIGIN 

1 cagccgaagc amgcaaaaat tcttccagga gctgagcaag agcctggacg catticcctga 
61 ggayttctgt cggcacaagg tgctgcccca gctgctgacc gccttcgagt tcggcaatgc 
121 tggggccgtt gtcctcacgc ccctcttcaa ggtgggcaag ttcctgagcg ctgaggagta 
181 tcagcagaag atcatccctg tggtggtcaa gatgttctca tccactgacc gggccatgcg 
241 catccgccrcc ctgcagcaga tggagcagtt catccagtac cttgacgagc caacagtcaa 
301 cacccagatc ttcccccacg tcgtacatgg cttcctggac accaaccctg ccatccggga 
361 gcagacggtc aagtccatgc tgctcctggc cccaaagctg aacgaggcca acctcaatgt 
421 ggagctgatg aagcactttg cacggctaca ggccaaggat gaacagggcc ccatccgctg 
481 caacaccaca gtctgcctgg gcaaaatcgg ctcctacctc agtgctagca ccagacacag 
541 ggtccttacc tctgccttca gccgagccac tagggacccg tttgcaccgt cccgggttgc 
601 gggtgtcctg ggctttgctg ccacccacaa cctctactca atgaacgact gtgcccagaa 
661 gatcctgcct gtgctctgcg gtctcactgt agatcctgag aaa^ccgtgc gagaccaggc 
721 ctt.caaggc9i wttcggagct tcctgtccaa attggagtct gtgtcggagg acccgaccca 
781 gctggaggaa gtggagaagg atgtccatgc agcctccagc cctggcatgg gaggagccgc 
841 agctagctgg gcaggctggg cgtgaccggg gtctcctcac tcacctccaa gctgatccgt 
901 tcgcacccaa ccactgcccc aacagaaacc aacattcccc aaagacccac gcctgaagga 
961 gttcctgccc cagcccccac ccctgttcct gccaccccta caacctcagg ccactgggag 
1021 acgcaggagg aggacaagga cacagcagag gacagcagca ctgctgacag atgggacgac 
1081 gaagactggg gcagcctgga gcaggaggcc gagtctgtgc tggcccagca ggacgactgg 
1141 agcaccgggg gccaagtgag ccgtgctagt caggtcagca actcbgacca caaatcctcc 
1201 aaatccccag agtccgactg gagcagctgg gaarctgagg gctcctggga acagggctgg 
1261 caggagccaa gctcccagga gccacctyct gacggtacac ggctggccag cgagtataac 
1321 tggggtggcc cagagtccag cgacaagggc gaccccttcg ctaccctgtc tgcacgtccc 
1381 agcacccagc cgaggccaga ctcttggggt gaggacaact gggagggcct cgagactgac 
1441 agtcgacagg tcaaggctga gctggcccgg aagaagcgcg aggagcggcg gcgggagatg 
1501 gaggccaaac gcgccgagag gaaggtgcca agggccccat gaagctggga gcccggaagc 
1561 tggactgaac cgtggcggtg gcccttcccg gctgcggaga gcccgcccca cagatgtatt 
1621 tattgtacaa accatgtgag cccggccgcc cagccaggcc atctcacgtg tacataatca 
1681 gagccacaat aaattctatt tcacaaaaaa aaaaaaaaaa aaaaaaa 
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100 



-H.sapiens 



85 



0.15 



- C.elegans_e1350092 

S.pofnbe_013733 

S.cerv«si8e_S60992 

Nico1iana_tabacum_e244568 
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>gl I2210766I9^IAA481214 IAA481214 aa34e02.rl NCI_CGAP_GCB1 Homo sapiens cDNA clone 

IMAGE: 615162 S* aimXlar to HP:HO'7A12.4 CZ03795 7r mRNA aequencc [Homo sapiens] 

CATGGCTTCCTGGACACCAACCCTGCCy^TCCGGGAGCAGACGGTCAAGTCCATGCTGCTCCTGGCCCCAA 

A6CTGAACGAGGa:AACCTCAATGTGGAGCTGATGAA6CACTTTGCACGGCTACAGGCCAA^ 

GGGCCCCATCCGCTGCAACACCACAGTCTGCCTGGGCAAAATCGGCTCCTACCTCAGTGCrA^^ 

CACAGGGTCCTTACCTCTGCCTTCAGCCGAGCCACTAGGGACCCGTTTGCACCGTCCCGGGTTGCGGGTG 

TCCTGGGCTTTGCTGCCACCCACAACCTCTACTCAATGAACGACTGTGCCCAGAAGATCCT6CCTGTGCT 

CTGCGGTCTCACTGTAGATCCTGAGAAATCCGTGCGAGACCAGGCCTTCAAGGCA 



>gl|i34 921Xigb|W51957iWS19S7 zc45f01.rl Soares^senescent.flbroblasts^NbHSF Homo 

sapiens cDNA clone IMAGE: 325273 S*, tnRNA sequence [Homo sapiens) 

CCTTCGAGTTCGGCAATSCTGGGGCCGTTGTCCTCACGCCCCTCTTCAAGGTGGGCAAGTrCCTGAGCGC 

TGAGGAGTATCAGCAGAAGATCATCCCTGTGGTGGTCAAGATGTTCTCATCCACTGACCGGGCCATGCGC 

ATCCGNCTCCTGCAGCAGATGGAGCAGTTCATCaVGTACCTTGACGAGCCAACAGTCAACACCCAGATCT 

TCCCCGACGTCGTACATGGCTTCCTGGACACCAACCCTGCCATCCGGGAGCAfiACGGTCAAGrCCATGCT 

GCTCCTGGCCCCAAAGCTGAACGAGGCCAACCTCAATGTGGAGCTGATGAAGCACTTTGCACGGCTACAG 

GCOVAGGATGAACAGGGCCCCATCCGCTGCAACACCACAGTCTGCCTGGGCAAAATOKSCrCCTACCrCA 

GTGCTAGCACCAGMACAGGGTCCTTACCTCTG 
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3 1 . The system according to claim 22, vydierein said error recovery 
means comprises: 

means for segmenting the text data; and 

means for analyzing the segmented text data to achieve at least a partial 
5 parsing of the unsuccessfully parsed text data. 

32. The system according to claim 22, wherein said tagging means 
comprises means for providing the structured data component in a Standard 
Generalized Markup Language (SGML) compatible format. 
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24. The system according to claim 22, further comprising means for 
referring to an additional parameter which is indicative of the degree to which 
subphrase parsing is to be carried out. 

25. The system according to claim 22, wherein said parsing means 
5 further comprises means for segmenting the text data by sentences, 

26. The system according to claim 22, wherein said parsing means 
further comprises: 

means for segmenting the text data by sentences; and 

means for segmenting each of the sentences at identified words or 

10 phrases. 

27. The system according to claim 22, wherein said parsing means 
further comprises: 

means for segmenting the text data by sentences; and 
means for segmenting each of the sentences at a prefix. 

1 5 28. The system according to claim 22, wherein said parsing means 

further comprises means for skipping undefined words. 

29. The system according to claim 22, wherein said parsing means 
further comprises: 

means for identifying one or more binary actions and their relationships; 

20 and 

means for identifying one or more arguments associated with the actions. 

30. The system according to claim 22, fiuther comprising means for 
performing error recoveiy when parsing of the text data is unsuccessful. 
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17. The method according to claim 1 1 , wherein said parsing step 
ftirther comprises skipping undefined words. 

1 8. The method according to claim 1 1 , wherein said parsing step 
further comprises: 

5 identifying one or more binary actions and their relationships; and 

identifying one or more arguments associated with the actions. 

19. The method according to claim 1 1 , further comprising 
performing error recovery when parsing of the text data is imsuccessful, 

20. The method according to claim 1 9, wherein said error recovery 
10 step comprises: 

segmenting the text data; and 

analyzing the segmented text data to achieve at least a partial parsing of 
the imsuccessfidly parsed text data. 

21 . The method according to claim 1 1 , wherein said tagging step 

1 5 comprises providing the structured data component in a Standard Generalized Markup 
Language (SGML) compatible format. 

22. A computer system for extracting information on biological 
entities fi:t)m natural-language text data, comprising: 

(i) means for parsing the natural-language text data; and 
20 (ii) means for regularizing the parsed text data to form structured 

word terms. 

23. The system according to claim 22, further comprising means for 
preprocessing the data prior to parsing, with the preprocessing means comprising 

25 identifying biological entities. 
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1 0. The method of claim 9 further comprising using each identified 
expression sequence tag to search sequence databases for 
overlapping sequences for the purpose of assembling longer 
overlapping stretches of DNA. 

5 

11. A method for extracting information on interactions betv^een 
biological entities from natural-language text data, comprising: 

(i) parsing the text data to determine the grammatical structure of 
the text data ;and 

1 0 (ii) regularizing the parsed text data to form structured word terms. 

1 2. The method according to claim 1 1 , further comprising 
preprocessing the data prior to parsmg, with preprocessing comprising the step of 
identifying biological entities. 

1 3 . The method according to claim 1 1 , further comprising referring 
15 to an additional parameter which is indicative of the degree to which subphrase 

parsing is to be carried out. 

14. The method according to claim 11, wherein said parsing step 
further comprises segmenting the text data by sentences. 

1 5. The method according to claim 1 1 , wherein said parsing step 
20 further comprises: 

segmenting the text data by sentences; and 

segmenting each of the sentences at identified words or phrases. 

1 6. The method according to claim 1 1 , wherein said parsing step 
further comprises: 

25 segmenting the text data by sentences; and 

segmenting each of the sentences at a prefix. 
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(v) imputing the species tree and gene tree into an algorithm 
which integrates the species tree and the gene tree into a 
reconciled tree; and 

(vi) identifying orthologous genes present in one species but 
5 missing in another. 

8. The method of claim 7 wherein the following algorithm is used 
to integrate the species tree and the gene tree into a reconciled tree: 

(i) computing the similarity o{Sg.JS^j) for each pair of 
interior nodes from trees Tg and T,, 
1 0 (ii) finding the maximum o(S^.JS^J) ; 

(iii) saving Sgi as a new cluster of orthologs, save {8^} - {Ssj} 
as a set of species that are likely to have gene of this kind 
(or lost it in evolution); 

(iv) eliminating Sgj from Tg; Tg: = TgVSgj; 

1 5 (v) repeating step (ii)-(iv) until Tg is non-empty. 



A method for identifying a novel gene comprising the following 



steps: 



(i) defining a motif or domain composition of a gene of 
interest; 

20 (ii) searching for sequences which correspond to nucleotide 

sequences in an expression sequence tag database or other 
cDNA databases using a program such as BLAST and 
retrieving the identified sequences; 

(iii) searching additional databases for expressed sequence 
25 tags containing the domains and motifs characteristic for 

the gene of interest with Hidden Markov Model of 
domains and motifs identified in step (i); 

(iv) identifying nucleotide sequences comprising the gene of 
interest. 
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4. The method of claim 1 wherein the regulatory pathway is involved 

in apoptosis. 

5. The method of claim 1 wherein the specific protein from the first 
5 species is involved in tumor suppression. 

6. A method for identifying the affect of a gene knockout on a 
regulatory pathway comprising the following steps: 

(i) identification of the shortest non-oriented pathway 
connecting two gene products; 
1 0 (ii) assigning an initial sign value of to the knockout since 

the knockout gene product is inactive; 
(ill) moving along the shortest pathway between the two gene 
products multiplying the sign with the sign of the next 
gene product in the pathway, wherein stands for 
1 5 inhibition, stands for induction or activation, and "0" 

stands for the lack of interaction between two proteins in 
the specified direction; and 
(iv) determining the final sign at the end of the pathway, . 
wherein indicates inhibition and indicates 
20 induction or activation of the pathway. 

7. A method for identifying a novel nucleic acid molecule encoding 
a protein of interest comprising: 

(i) selecting a gene of interest and searching a database for 
homologous sequences; 
25 (ii) aligning the homologous sequences identified in step (/); 

(iii) constructing a gene tree using the sequence alignment; 

(iv) constructing a species tree; 
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CLAIMS 

1 . A method for identifying a novel nucleic acid molecule encoding 

a protein of interest comprising: 

(i) selecting a specific protein from a first species involved 
5 in a regulatory network of interest; 

(ii) identifying known proteins that act upstream and 
downstream in the regulatory network of interest with 
respect to the specific protein selected; 

(iii) constructing the regulatory network of interest from the 
1 0 proteins identified in step (ii); 

(iv) for each identified protein, select a domain or motif and 
search by homology for related proteins in a second 
species, wherein a related protein is defined as a protein 
having a homologous domain or motif; 

1 5 (v) producing a regulatory network for the second species, 

wherein said regulatory network incorporates the 

identified related proteins; 
(vi) comparing the regulatory network from the first sp>ecies to 

the regulatory network of said second species; 
20 (v) identifying a protein present in a regulatory network for 

one species but absent in the regulatory network of the 

other species; and 
(vi) isolating a nucleic acid molecule encoding the protein 

identified in step (v) in the species in which it is missing. 

25 2. The method of Claim 1 wherein the nucleic acid molecule encodes 

human protein. 

3. The method of claim 1 wherein the related proteins are orthologs. 
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WI). The nucleotide sequence of the human MatB gene is presented in Figure 1 7B. The 
deduced amino acid sequence of the gene is presented in Figure 17C. The translated 
sequence consists of 206 amino acid residues 81% of which are identical to mouse Mad3 
protein. The alignment of human and mouse Mad3 proteins shown below was made using 
5 BLAST server at NCBI and is presented in Figure 1 7C. 

Multiple alignment of the new sequence with sequences of known Mad 
proteins was made using Clustalw and viewed with the HitViewer. A gene tree was 
computed from this alignment using NJBOOT. Multiple alignment of the new 
sequence with sequences of known Mad proteins (Figure 1 7C) along with its position 
10 on gene tree (Figure 1 8B) shows that this new himian gene found by the approach 
described above belongs to the family of Mad proteins and is the ortholog of mouse 
Mad3. 

The present invention is not to be limited in scope by the specific 
embodiments described herein, M^iich are intended as single illustrations of individual 

1 5 aspects of the invention, and functionally equivalent methods and components are 
within the scope of the invention. Indeed, various modifications of the mvention, in 
addition to those shown and described herein will become apparent to those skilled in 
the art from the foregoing description and accompanying drawings. Such 
modifications are intended to fall within the scope of the appended claims. 

20 Various publications are cited herein, the contents of which are hereby 

incorporated by reference in their entireties. 
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The gene tree shown in the Figure 20 was constructed in the following way. The 
protein sequences of known members of Marf gene family were extracted from 
GenBank database using NCBI Entrez keyword searches. The extracted sequences 
were aligned using muhiple alignment program Clustalw ruiming on Sun SPARC 
5 station. The quality of the multiple alignment was checked using program HitViewer 
Iterate (A. Rzhetsky, available upon request) and the redundant, non-homologous 
sequences as well as distant homologs from 5. cerevisiae, C. elegans, D. 
melanogaster etc. were removed from the alignment. The refined set of sequences was 
realigned with Clustalw and a gene tree as presented in Figure 1 5 A was computed 

10 from the alignment using program NJBOOT (http://genome6.cpmc.columbia.edu // 
andrey) running on Sun SPARC station and viewed with program TreeView 
(http://genome6.cpmc.columbia,edu // andrey). 

The tree presented in Fig.l9A clearly shows the relationships between 
three known mouse genes and their two human homologs. Attempts to find a missing 

1 5 human ortholog of the mouse MadS gene in protein non-redundant database at NCBI 
using BLAST search did not identify any human homologs other than sequences that 
were already present on the tree, confirming the absence of a known human ortholog 
for Mad3 protein in the database. 

In order to identify a human ortholog of the Mad3 protein, a human 

20 dbEST at NCBI was searched with program TBLASTN using Mad3 protein sequence 
as a query. Two EST were identified and are shown in Figure 17 A. 

Due to the nature of dbEST database this search produced only partial 
sequences of potential candidate genes. To obtain complete coding sequences 
(complete cds) of the genes, a search was conducted to obtain overlapping sequences 

25 in dbEST. The search for overlapping sequences was performed using the program 

Iterate with EST zs77e55.rl (gb|AA278224) serving as a query. The search returned a 
single overlapping sequence, namely HUMGS0012279 (dbj|C02407), thus indicating 
that the two EST sequences found during the initial TBLASTN search belong to the 
same gene. 

30 The complete sequence of the gene was assembled from the two ESTs 

using conrniercially available sequence assembly program SeqManll (DNASTAR Inc., 
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sites and apparently antagonize Af;;c-mediated activation of the same set of target 
genes. 

During tissue development a shift from Myc\Max to MADiMax 
complexes occurs coincidentally with the switch from cell proliferation to 
5 differentiation. The switch in heterocomplexes is thought to reflect a switch from 

activation to repression of common genes leading to cessation of proliferation, exiting 
the cell cycle and the beginning of cell differentiation. In differentiating neurons, 
primary keratinocytes, myeloid cell lines and probably other tissues the expression of 
different MADiMax complexes appear in sequential order during the transition from 

1 0 ceil proliferation to differentiation. The MAD3 expression appears first and it is 

restricted to proliferating cells prior to differentiation where it is co-expressed with 
two different member of Myc family, c-Myc or 'N-Myc, A6cil transcripts are detected 
in proliferating and differentiating ceils whereas MADl and MAD4 were confined to 
post-mitotic cells. Because Myc expression is not always downregulated in 

1 5 post-mitotic cells, co-expression of Myc and MAD genes may result in competition for 
Max heterodimers thus providing promoting or inhibitory effect on cell proliferation. 

The gene expression patterns, along with ability of Mad proteins to 
suppress Afyc-dependent transformation, are consistent with a potential fimction of 
Mad genes as tumor suppressors. This view is supported by the fact that allelic loss 

20 and mutations were detected at the Mxil locus in prostate cancers (Eagle et al., 1995 
Nat Genet 9:249-55). Cloning of the murine proteins MadS and Mad4 as well as their 
relation to Max signaling network was described by Hurlin (Hurlin PJ, et al., 1995, 
EMBO J. 14:5646-59) and Queva (Queva et al. 1998 Oncogene 16:967-977). Human 
orthoiogs of Mad4, Mad I and MxiJ are known. 

25 In this example, the discovery of an unknown human ortholog of MadS 

protein found "in silico" by means of phylogenetic analysis of known mouse and 
human members of the Mad gene family and database searches is described. Since the 
function of murine MadS as a Max- interacting transcriptional repressor of 
Afyc-induced neoplastic transformation is well described, we can assign the same 

30 function to its human ortholog. 
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protein (sp|Pl 5533 |RPT1) (Figure 13). Analysis of regulatory functions of RPTl in 
the mouse reveals that this gene functions as a repressor of the interleukin 2 receptor 
(IL-2R) gene. When the RPTl gene is knocked out, the regulatory effect is 
manifested as a block of the apoptotic pathway in T lymphocytes resulting in an 
5 accumulation of T lymphocytes in blood. This result is consistent with aberrations 
observed in CLL, namely abnormal accumulation of B-cells in the blood ( Trentin L. 
et al., 1997, Leuk. Lymphoma 27:35-42) and mutations in the human RPTl gene play 
a role in development of CLL. 

6.1.3 EXAMPLE: A DISCOVERY OF A HUMAN ORTHOLOG OF THE 

10 MURINE MAX-INTERACTING TRANSCRIPTIONAL 

REPRESSOR : 

The family of Myc proto-oncogenes encodes a set of transcription 
factors implicated m regulation of cell proliferation, differentiation, transformation 
and apoptosis. C-Myc null mutations result in retarded growth and development of 

15 mouse embryos and are lethal by 9-10 day of gestation. In contrast, overexpression of 
Myc genes inhibits cell differentiation and leads to neoplastic transformation. 
Moreover, deregulation of Myc expression by retroviral transduction, chromosomal 
translocation or gene amplification is linked to a broad range of naturally occurring 
tumors in humans and other species. 

20 Another protein, called Mcdc, is an obligatory heterodimeric partner for 

Myc proteins in mediating their function as activators of transcription during cell cycle 
progression, neoplastic transformation and programmed cell death (apoptosis). In 
order to make an active transcription factor the Myc proteins must form heterodimers 
with Max protein. This interaction with Max protein is necessary for specific binding 

25 of Myc with CACGTG box (or related E-boxes) on DN A and for activation of 
promoters located proximal to the binding sites. 

Besides the Myc family of transcription factors, the Max protein forms 
complexes with another family of so-called MAD proteins: Mxily MAD J, MADS and 
MAD4. Whereas MyciMax complexes activate transcription, MADiMax complexes 

30 work in an opposite way repressing the transcription through the same E-box binding 
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The two closest human sequences, AA481214 and W51957, are depicted in Figure 
14A. To determine whether the identified human sequences were orthologs or 
paralogs to the gi| 1 132541 gene of C elegans, a gene tree (Saito and Nei, 1997, 
Molecular Biol. Evol. 4:406-425) was computed. The gene tree was generated using 
5 homologous genes identified with a BLASTP search against NCBI non-redundant 
database, using the human EST AA481214 sequence as a query. The resulting tree 
indicates that the identified human EST AA481214 represents a true ortholog of the 
C.elegans gene gi(l 132541 (Figure 14B). The nucleotide sequence of the death 
domain protein is shown in Figure 14C, as well as the deduced amino acid sequence 
1 0 presented in Figure 1 4D. 

6.1.2 APQPTOSIS GENE DISCOVERY METHOD 
As a first step in identifying a novel gene involved in apoptosis, a 
comprehensive set of articles describing the system of apoptosis/programmed cell 

1 5 death in different species was compiled using the keyword "apoptosis". By analyzing 
the articles, information on regulatory pathways characterizing this system in different 
species, i.e., C. elegans, mouse, firuit fly, chicken, and human, was extracted. The 
regulatory information was stored as a collection of schemes produced in PowerPoint 
(Microsoft). Figure 4 shows a set of keywords defining proteins involved in apoptosis 

20 pathways. The keywords were used to generate a specialized sequence database, 
referred to as Apoptosis3, utilizing the PsiRetriever program for extraction of 
proteins from the all-inclusive non-redundant GenBank database (NCBI). Using 
program PsiRetriever, sequences from the non-redundant (NCBI) database of protein 
sequences, were retrieved and stored as a FASTA file. The FASTA file was then 

25 converted into binary blast database using program FORMATDB from the BLAST 
suit of programs. 

Genomic and cDNA sequences located in the region of human 
chromosome 13q were compared with the Apoptosis3 database using BLAST ALL 
program from BLAST program complex. This region of the human genome is 

30 associated with Chronic Lymphocytic Leukemia (CLL). The comparison revealed 
significant similarity between a CLL region open reading frame and the mouse RPTl 
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groupings of proteins: (i) proteins known to be tumor suppressors, and (ii) proteins 
implicated in apoptosis.in animals were developed. 



6.1 APOPTOSIS GENE DISCOVERY METHOD 
5 Identification of a putative apoptosis-related hximan gene began with 

an identification of all genes in C. elegans that contained either a POZ or kelch 
domain. A subset of these genes is shown in Figure 13. Hidden Markov Models 
(HMM) for the POZ and Kelch domains were built as follows. Starting with POZ and 
kelch sequences from the Drosophilia kelch protein (gi 1 577275) homologs were 

10 identified in other protein sequences using the BLASTP program. The resulting 

sequences showing significant similarity (e-value Jess than 0.001) were aligned using 
CLUSTALW program and the alignments were used to build Hidden Markov Models 
with HMMER-2 package (Krogh et al., 1995, :http://hmmer.wustl.edu/). A computer 
printout listing of HMM models of tumor suppressors appears as a Microfiche H to 

1 5 the present specification. (See, http://hnuner.wustLedu; Chapter 2, which is 

incorporated by reference herein in its entirety, for a detailed description of HMM 
models) 

The resulting models were used to search through a database collection 
of C, elegans protein sequences. The domain structures of proteins having either a 

20 POZ or kelch domain were identified using existing collections of protein domains 
(e.g., see http://blocks.fhcrc.org/blocks/blocks release.html, http://coot.embl- 
heidelberg.de/SMART/, http://www.motif.genome.ad.jp/). One of the unannotated 
protein-coding genes of C. elegans (corresponding protein accession number 
gi| 1 132541, see Figure 11) appeared to include a POZ domain, death domain, kinase 

25 domain, and heat repeat. A death domain is characteristic for the apoptosis system 
and a kinase domain indicates that the protein is likely to participate in 
phosphorylation of other proteins. The presence of these particular domains suggests 
that this protein is serving as a regulatory protein. 

Using the protein sequence of gi| 1 132541, the database of human EST 

30 sequences was searched and a number of partial human cDNA sequences representing 
potential human orthologs or paralogs of the C. elegans gi 1 1 1 3254 1 were identified. 
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In another embodiment, polymerase chain reaction (PGR) can be used 
to amplify the desired sequence from a genomic or cDNA library. To isolate 
orthologous or paralogous genes from other species, one synthesizes several different 
degenerate primers, for use in PGR reactions. In a preferred aspect, the 
5 oligonucleotide primers represent at least part of the gene comprising known ortholog 
or paralog sequences of different species. It is also possible to vary the stringency of 
hybridization conditions used in priming the PGR reactions, to allow for greater or 
lesser degrees of nucleotide sequence similarity between the known nucleotide 
sequences and the nucleic acid homolog being isolated. 

1 0 Synthetic oligonucleotides may be utilized as primers to amplify by 

PGR sequences from a source (RNA or DNA), preferably a cDNA library, of potential 
interest. PGR can be carried out, e.g., by use of a Perkin-Elmer Getus thermal cycler 
and a thermostable polymerase, e.g., Amplitaq (Perkin-Elmer). The nucleic acids 
being amplified can include mRNA or cDNA or genomic DNA from any eukaryotic 

1 5 species. After successful amplification of a segment of a the gene of interest, that 

segment may be molecularly cloned and sequenced, and utilized as a probe to isolate a 
complete cDNA or genomic clone. 

Once identified and isolated the gene of interest can then be inserted 
into an appropriate cloning vector for amplification and/or expression in a host. A 

20 large number of vector-host systems known in the art may be used. Possible vectors 
include, but are not limited to, plasmids and modified viruses, but the vector system 
must be compatible with the host cell used. Such vectors include, but are not limited 
to, bacteriophages such as lambda derivatives, or plasmids such as pBR322 or pUC 
plasmid derivatives or the Bluescript vector (Stratagene). The insertion into a cloning 

25 vector can, for example, be accomplished by ligating the DNA fragment into a cloning 
vector which has complementary cohesive termini. 



6. EXAMPLE: USE OF SPECIALIZED DATABASES 

FOR IDENTIFIGATIQN OF NOVEL GENES 
To test the method of using databases for gene discovery, protein 
30 sequence and domain/motif databases specific to two overlapping functional 
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carried out in the same solution with the following modifications: 0.02% PVP, 0.02% 
FicolU 0.2% BSA, 100 mg/ml salmon sperm DNA, 10% (wt/vol) dextran sulfate, and 
5-20 X 10^ cpm ^2P-Iabeled probe is used. Filters are incubated in hybridization 
mixture for 18-20 h at 40°C, and then washed for 1.5 h at SS^'C in a solution 
5 containing 2X SSC, 25 mM Tris-HCl (pH 7.4), 5 mM EDTA, and 0.1% SDS. The 
wash solution is replaced with fresh solution and incubated an additional 1 .5 h at 
60 "^C, Filters are blotted dry and exposed for autoradiography. If necessary, filters 
are washed for a third time at 65-68 °C and reexposed to film. Other conditions of 
low stringency which may be used are well known in the art (e.g., as employed for 

10 cross species hybridizations). 

In another specific embodiment, a nucleic acid which is hybridizable to 
a nucleic acid under conditions of moderate stringency is provided. For example, but 
not by way of limitation, procedures using such conditions of moderate stringency are 
as follows: filters containing DNA are pretreated for 6 h at 55 "^C in a solution 

15 containing 6X SSC, 5X Denhart*s solution, 0.5% SDS and 100 mg/ml denatured 

salmon sperm DNA. Hybridizations are carried out in the same solution and 5-20 X 
10^ CpM ^^P- labeled probe is used. Filters are incubated in the hybridization mixture 
for 1 8-20 h at 55''C, and then washed twice for 30 minutes at 60°C in a solution 
containing IX SSC and 0. 1% SDS. Filters are blotted dry and exposed for 

20 autoradiography. Other conditions of moderate stringency which may be used are 
well-known in the art. Washing of filters is done at ST^'C for 1 h in a solution 
containing 2X SSC, 0.1% SDS. 

For expression cloning (a technique commonly used in the art), an 
expression library is constructed. For example, mRN A is isolated from the cell type 

25 of interest, cDNA is made and iigated into an expression vector {e.g., a bacteriophage 
derivative) such that it is capable of being expressed by a host cell {e.g., a bacterium) 
into which it is then introduced. Various screening assays can then be used to select 
for the expressed gene product of interest based on the physical, chemical, or 
immunological properties of its expressed product. Such properties can be deduced 

30 from the properties of the corresponding orthologs from other species. 
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knock out of gene A which can be any one of the following: inhibition of gene B, 
induction/activation of gene B, or none. In addition to the "electronic knock out", an 
"electronic knock in" of a particular gene can be simulated. In such a computer 
simulation, the artificial addition of a gene and its effect on a regulatoiy system may 
5 be analyzed. 

5.6. IDENTIFICATION AND ISOLATION OF NOVEL GENES 
The present invention relates to identification of novel genes, i.e., 
missing orthologs or paralogs, and the isolation of nucleic acid molecules encoding 

10 novel genes. In a specific embodiment, a nucleic acid molecule encoding a missing 
ortholog or paralog can be isolated using procedures well known to those skilled in 
the art (See, for example, Sambrook et al., 1989, Molecular Cloning, A Laboratory 
Manual, 2d Ed., Cold Spring Harbor Laboratory Press, Cold Spring Harbor, New 
York Glover, D.M. (ed.), 1985, DNA Cloning: A Practical Approach MRL Press, 

1 5 Ltd., Oxford, U.K. Vol. I, IL). 

For example, genomic and/or cDNA libraries may be screened with 
labeled DNA fragments derived from a known ortholog or paralog from a specific 
species and hybridized to the genomic or cDNA libraries generated from a different 
species. For cross species hybridization, low stringency conditions are preferred. For 

20 same species hybridization, moderately stringent conditions are preferred. Any 
eukaryotic cell potentially can serve as the nucleic acid source for the molecular 
cloning of the gene of interest. The DNA may be obtained by standard procedures 
knov^ in the art from cloned DNA (e.g., a DNA "library"), by cDNA cloning, or by 
the cloning of genomic DNA, or fragments thereof, purified from the desired cell. 

25 By way of example and not limitation, procedures using conditions of 

low stringency are as follows (see also Shilo and Weinberg, 1 981 , Proc. Natl. Acad. 
Sci. USA 78:6789-6792; and Sambrook et al. 1989, Molecular Cloning, A Laboratory 
Manual, 2d Ed., Cold Spring Harbor Laboratory Press, Cold Spring harbor. New 
York): Filters containing DNA are pretreated for 6 h at 40**C in a solution containing 

30 35% formamide, 5X SSC, 50 mM Tris-HCl (pH 7.5), 5 mM EDTA, 0.1% PVP, 0.1% 
Ficoll, 1% BSA, and 500 mg/ml denatured salmon sperm DNA. Hybridizations are 
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while the "object" substance can be in either active, or inactive, state depending on the 
action type. For example, the action "dephosphorylation" requires an active 
phosphatase ("subject" substance) and a phosphoiylated substitute protein ("object" 
substance) in phosphorylated form. If both conditions are satisfied, the action is 
5 recorded as in progress. At termination, the substances must change their states as 
specified by the action. On each following "quantum" of time, the simulation 
proceeds in the same way while maintaining the "bookkeeping" of the remaining time 
for each action and the remaining lifespan of each substance. The simulation stops 
when there are no more active actions available. The program allows editing of the 

10 properties of the objects, changing the scale and focus of the visualized simulation, 
and experimenting with the systems output. 

In a specific embodiment of the invention a "knock out" of a gene can 
be simulated to model the regulatory system that normally includes hypothetical 
gene A. One of the typical questions related to the gene knock out is how does the 

1 5 knock out affect a biological pathway of interest. A hypothetical example of 

evaluating the impact of a knock out of hypothetical gene A on the expression of a 
hypothetical gene B is shown in Figure 12. The answer to such a question could be 
"gene B will be inhibited" or "gene B will be induced" or "no effect". 

In the practice of the present invention, a simple algorithm involving 

20 multiplication of gene interaction "signs" along the shortest pathway between the 
genes can be used to determine the outcome. The algorithm involves the following 
steps: (i) identification of the shortest non-oriented pathway connecting genes A and 
B involved in a pathway of interest; (ii) assigning sign to gene A since it is 
knocked out and taking this sign as the initial sign value; (iii) moving along the 

25 shortest pathway between genes A and B, multiplying the current value of the sign 
with the sign of the next arc, where "-" stands for inhibition, stands for induction 
or activation, and "0" stands for the lack of interaction between two proteins in the 
specified direction; (iv) determining if the final result of multiplication is "0", if so 
eliminating the zero arc and trying to find the shortest oriented bypass pathway 

30 between A and B in the remaining network; otherwise stop. The final value of the 
sign at the moment of arriving at vertex B would indicate the most likely effect of the 



BNSDOCIO <WO_0063e87Al J_» 



wo 00/63687 



PCT/USOO/10302 



39 

The present invention encompasses the nucleic acid molecule of Figure 
14C, comprising the sequence of EST AA481214 and proteins encoded by said 
nucleic acid molecule. The invention also relates to nucleic acid molecules capable of 
hybridizing to such a nucleic acid molecule under conditions of high stringency. By 
5 way of example and not limitation, procedures using such conditions of high 

stringency are as follows: Prehybridization of filters containing DNA is carried out 
for 8 hours to overnight at 65°C in buffer composed of 6x SSC, 50mM Tris-HCl 
(pH7.5), ImM EDTA, 0.02% PVP, 0,02% Ficoli, 0.02% BSA and 500 mg/ml 
denatured salmon sperm DNA. Filters are hybridized for 48 hours at 65°C in 
10 prehybridization mixture containing 100 mg/ml denatured salmon sperm DNA and 5- 
20 X 10** CpM of "P-labeled probe. Washing of filters is done at 37°C for 1 hour in a 
solution containing 2x SSC, 0.01% PVP, 0,01% Ficoli and 0.01% BSA. This is 
followed by a wash in 0.1 x SSC at SO^'C for 45 minutes before autoradiography. 
Other conditions of high stringency which may be used are well known in the art. 

15 

5.5.3. SIMULATION OF REGULATORY CASCADES 
In an embodiment of the invention, an interactive graphical program is 
utilized for visualizing the scheme of regulatory relationships, "current" states of the 
substances, and active and inactive actions between pairs of substances. Such a 

20 program can be utilized for identification of genes which are associated with a specific 
disease. Currently, disease associated genes are discovered through positional cloning 
methods which combine methods of genetics and physical mapping with mutational 
analysis. The present invention provides a novel method for discovering disease 
associated genes. For simulating regulatory cascades, it is assumed that the time in a 

25 simulated regulatory system advances in discrete "quanta," or periods of time. The 
"state of substances" of the system for each discrete period of time is computed by: 
creating a set of substance objects, where a set of interactions between each created 
substance object is known, an initial state is specified. The time is initially set to zero. 
All defined actions are observed to confirm that the substances corresponding to the 

30 actions (i) exist, and (ii) are in the right initial states. Action is defined by a pair of 
substances that are in suitable states. The "subject" substance is in the inactive state, 
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including for example yeast and/or nematode genes, that bear a significant similarity 
to the gene of interest or a specified domain of the corresponding protein are 
collected. Third, the identified genes are in turn subjected to a "domain analysis'* to 
establish protein motifs which might suggest a function of these genes using, for 
5 example, HMMER software. Fourth, the selected genes are in turn used for database 
searches in EST databases (dbEST) and/or a non-redundant (nr) database to identify 
unknown genes that are potentially orthologous to the selected yeast and nematode 
genes. Once identified ESTs having different timior suppressor domains may be 
linked using multiple PGR primers. Using routine cloning techniques, well known to 
10 those of skill in the art» a full length cDNA representing the gene of interest can be 
obtained. 

Once new genes are identified by domain/motif analysis experimental 
searches may be carried out to isolate complete coding sequences and evaluate their 
tissue* and disease-specific expression patterns. In parallel their position with respect 

1 5 to regulatory networks can be identified as described below. 

In a specific embodiment of the invention, an apoptosis related human 
gene was identified using the method described above. As a first step C elegans 
genes containing either POZ or ICelch domains were identified. A Hidden Markov 
Model was developed using POZ and Kelch sequences from the Drosophila Kelch 

20 protein and any identified homologs. The resulting Hidden Marker Model was used 
to search through the collection of C elegans protein sequences. One of the identified 
C elegans genes contained a POZ domain, death domain, kinase domain and heat 
repeat. The presence of both a death domain and a kinase domain suggested that the 
protein functions as a regulatory protein. 

25 A human EST database was searched using the protein sequence of the 

identified C elegans gene and two sequences were identified (Figure 14A). A gene 
tree was computed to determine whether the identified human sequences were 
orthologs of the C. elegans gene. As depicted in Figure 14B, the human EST 
AA481214 appears to be a true ortholog of the C elegans gene. Figure 14C presents 

30 the nucleotide sequence of the identified death domain gene. Figure 1 4D presents the 
amino acid sequence of the death domain protein. 
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orthologous proteins with pairs of orthologous domains. After this correction, 
homologous networks are compared as described above. 

Figure 10 is a diagram representing a hypothetical example of defining 
homologous protein networks in two different species using protein motifs, the 
5 diagram showing only two hypothetical proteins (lane 2) for species A and three 
hypothetical proteins (lanes 1, 3, and 4) for species B. Protein 1 in both species has 
motifs a and P, protein 2 has motifs 6, e, and C, and proteins 3 and 4 have motifs 6 
and C, and €, respectively. The motif analysis indicates that proteins 3 and 4 in 
species B may collectively perform the same function as protein 2 in species A. 

10 5.5.2 GENE DISCOVERY BASED ON PROTEIN 

MOTIF/DOMAIN SEARCHES 

The present invention provides yet another method for identifying 
genes that are homologous and perform the same or an analogous function in different 
species. The method of the invention comprises the following steps: (i) creating a 

1 5 database of sequences which comprise a motif or domain composition of a gene of 

interest using, for example, HMMER software; and (ii) searching additional databases 
for expressed sequence tags (ESTs) containing the domains and motifs characteristic 
for the gene of interest with HMMs of domains and motifs identified in step (i). In yet 
another embodiment of the invention, sequences may be searched which correspond 

20 to nucleotide sequences in an EST database or other cDNA databases using a program 
such as BLAST and retrieving the identified sequences. In an optional step, for each 
EST identified, sequence databases can be searched for overlapping sequences for the 
purpose of assembling longer overlapping stretches of DNA. Once identified, the 
ESTs can be used to isolate full length nucleotide sequences comprising the gene of 

25 interest using methods such as those described in Section 5.4, infra. 

The general flowchart scheme for gene discovery analysis based on 
motif^domain search is shovm in Figure 11. In a specific embodiment of the 
invention, the method referred to as the "phylogenetic reflection technique"comprises, 
first, defining the motif or domain composition of a gene of interest involved in a 

30 biological system of interest. Second, protein-coding genes from other species. 
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In a specific embodiment of the invention a set of regulatory networks 
from different species, relating to the same biological system, apoptosis, for example, 
can be analyzed and visualized utilizing the following methods: (i) for each species 
functional information is collected relating to apoptosis; (ii) using the functional 
5 information, regulatory networks for each species comprised of interacting proteins 
and/or the genes involved in apoptosis are generated; (iii) the sequences of the 
interacting proteins and genes of each of the regulatory network are compared and for 
sequences showing similarity above a predetermined threshold range; and (iv) 
distinguishing between orthologs and paralogs using the methods set forth above. 

1 0 An analysis similar to that performed using subtrees of sequences may 

be applied to classify protein functions as orthologous or paralogous actions. A 
"generalized" regulatory network maybe represented as a network wherein a substance 
as it occurs in a particular species is substituted with a cluster (i.e., subtree) of 
orthologous substances among species. In the final step of the analysis the clusters 

1 5 within each species are compared to one another, to identify missing genes. 

Figure 1 1 depicts the regulatory relationships among hypothetical 
proteins (denoted with Arabic numerals) of hypothetical species A and B. As 
indicated in Figure 11 A, an overlay of regulatory data for two species overlaps, but 
not completely. As indicated, protein 5 is known only for species B while protein 3 is 

20 known only for species A. The proteins in different species denoted with the same 
numeral are considered orthologous. As indicated, the regulatory relationships 
between a pair of proteins can be of three different kinds. Figure 9B, 9C, and 9D 
represent Boolean operations, OR, AND, and XOR, as arcs of the two regulatory 
relationships depicted in Figure 9A, the same operations being applicable to the set of 

25 vertices of the two regulatory relationships. 

In some instances, orthologous networks in two distantly related taxa may have the 
same domains but arrangement of the domains between the related taxa may be 
different. In such a case, a one-to-one correspondence between orthologous proteins 
in closely related species has to be substituted with a one-to-many relationship among 

30 domains^comprised within the proteins. For this purpose, a similarity object may be 
defined operating on pairs of motifs/domains in two proteins, and substitute pairs of 
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other species. The identified sequences are compared and for each pair of sequences 
showing similarity above a certain threshold, a similarity object is generated. A 
similarity object is generated if two sequences, nucleotide or amino acid, show 
significant similarity in database searches (p value < 0.00 1 ). The object retains the 
5 following information: (i) reference to similar substances i.e., genes or proteins; 

(ii) significance of the similarity, similarity score and percent of identity; and 

(iii) coordinates of the similarity region within two compared sequences. 
"Orthology objects" constitute a subset of "similarity objects" which satisfies one 
additional requirement, i.e., that two similar sequences should be identified as 

10 orthologs by the tree-based algorithm described above. In identifying orthologs, if 
gene A is orthologous to gene B, and gene B is ortfaologous to gene C, gene A is 
necessarily orthologous to gene C. 

In a specific embodiment of the invention, for each species under 
analysis, orthologous proteins or genes are identified. In a further embodiment of the 

1 5 invention, small orthologous molecules participating in a regulatory network for two 
or more species may also be identified. Where proteins, genes, or molecules are 
orthologs, the action of the protein, gene or molecule between species may be 
interchangeable. If more than two species are involved in the analysis, subtrees of 
orthologous substances and subtrees of orthologous actions are identified. 

20 Once orthologous genes, proteins or molecules are identified in two or 

more species, by forming a reconciled tree, for example, a set of orthologous or 
paralogous regulatory networks can be analyzed and visualized using graph theory 
where arcs represent actions and vertices represent substances. Thus, the method of 
the invention may further comprise the following steps: (i) superimposing the 

25 orthologous regulatory networks from two or more species and searching for the 
actions (arcs) and substances (vertices) in the homologous networks that are 
represented in some taxa but absent in others; (ii) superimposing paralogous 
regulatory networks from the same taxa and searching for paralogous genes that are 
missing in some taxa; and (iii) computing a general regulatory network that 

30 summarizes common regulatory sequence relationships known for more than one 
species. 
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multigene families through comparisons of regulatory networks for different species, 
searching expressed sequence tag (EST) databases, and simulation of regulatory 
cascades. 

5.5.1. GENE DISCOVERY THROUGH ANALYSIS 

5 OF REGULATORY NETWORKS 

The present invention provides a method for identifying undiscovered 
genes through comparisons of regulatory networks for different species where 
functionally similar regulatory systems are conserved. The amoimt of information 
available concerning regulatory genes and/or proteins in different organisms and their 

1 0 functional relationships allows one to reconstruct and compare regulatory networks. 
Since in most cases, the knowledge of all genes involved in almost any particular 
regulatory system is incomplete, a comparison of homologous networks within the 
same organism and between different species permits the identification of genes 
absent in a system under comparison. 

1 5 The identified genes, being part of a regulatory network, are implicated 

as potentially contributing to a phenotype of a disease associated with the system 
under analysis. Using the methods of the present invention these putative disease 
genes can be cloned, mapped and analyzed for mutations directly, thereby omitting 
the expensive and time-consuming steps of positional cloning and sequencing of 

20 genomic regions. Gene discovery by analysis of regulatory networks is outlined in 
Figure 8, The analysis is initiated starting with a biological system (e.g., signaling 
pathway of genes involved in Bcl-2-reguIated apoptosis in lymphocytes), a single 
gene (e.g., Bcl-2) or a gene family (e.g., caspases). 

Initially, a specialized database is generated for comparison of 

25 regulatory networics between different species. For example, starting with a single 
candidate gene in a single species, a typical iteration in this process begins with 
identification of all known proteins and genes that are upstream and downstream with 
respect to it in regulatory hierarchies and the reconstruction of a network of 
interacting genes and proteins. Next, for each protein, a set of key domains and motifs 

30 is identified and this infonnation is used to search for related proteins in humans and 
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conducted to obtain overlapping sequences in dbEST. The search for overlapping 
sequences was performed using the program Iterate with EST Zs77e55.rl 
(gb/AA278224) as the search query. The search identified a single overlapping 
sequence. The search for overlapping sequences was performed using program Iterate 
5 with EST zs77e55.rl (gb/AA278224) serving as a query. The search returned a single 
overlapping sequence, namely HUMGSOO 12279 (dbj/C02407), thus showing that the 
two EST sequences found during the initial TBLASTIN search belong to the same 
gene. The complete sequence of the gene was assembled from the two ESTs using 
commercially available sequence assembly program SeqManl 1(DNASTAR Inc., WI). 

1 0 The nucleotide sequence of the human Mad3 gene is presented in Figure 1 7B. The 
deduced amino acid sequence of which is presented in Figure 17C. The complete 
DNA sequence is also shown. 

The present invention relates to nucleic acid molecules encoding the 
human Mad3 protein shown in Figure 17C. The invention also relates to nucleic acid 

15 molecules that hybridize to the nucleic acid molecule of Figure 17B under conditions 
of high stringency and encode a Mad3 protein. By way of example and not limitation, 
procedures using such conditions of high stringency are as follows: Prehybridization 
of filters containing DNA is carried out for 8 hours to overnight at 65**C in buffer 
composed of 6x SSC, 50mM Tris-HCl (pH7.5), ImM EDTA, 0.02% PVP, 0.02% 

20 Ficoll, 0.02% BS A and 500 mg/ml denatured salmon sperm DNA. Filters are 

hybridized for 48 hours at SS^'C in prehybridization mixture containing 100 mg/ml 
denatured salmon sperm DNA and 5-20 x 10* CpM of ^^P-labeled probe. Washing of 
filters is done at 37°C for 1 hour in a solution containing 2x SSC, 0.01% PVP, 0.01% 
Ficoll and 0.01% BSA. This is followed by a wash in 0. Ix SSC at 50°C for 45 

25 minutes before autoradiography. Other conditions of high stringency which may be 
used are well known in the art. 

5,5. SIMULATION AND HYPOTHESIS TESTING 
The sunulation and hypothesis testing methods of the invention, 
described in the subsections below, utilize specialized databases of gene/protein 
30 structures and interactions for identifying potentially undiscovered members of 
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Then define similarity measure, o, between Sgi and S,j in the foUowng way: 
o(-S'gA>) = 0 if \^8i\ " I (^gii I ' (S^) " Sg„ and 

The support of tree clusters by data can be measured using the bootstrap technique 
5 described in Felsenstein (1985, Evolution 39:783-791). 

In an embodiment of the invention, the human antiquitin gene was 
identified using phylogenetic analysis. The aldehyde dehydrogenase gene family in 
humans can be subdivided into at least ten ancient subtrees characterized by different 
functions of corresponding proteins. These genes probably arose from a series of gene 

10 duplications of an ancestral gene which took place before the divergence of a conmion 
ancestor of Eukaryotes and Eubacteria. 

The aldehyde dehydrogenase gene cluster is highlighted in Figure 6 
which shows the original tree of ALDH sequences, the circled area indicating a 
sequence cluster where bacterial (Bacillus subtilis\ plant (Brassica napm\ and 

1 5 nematode (Caenorhabditis elegans) ortholog is present, but a human oitholog is not 
known. A random screening of cDNA libraries showed that a human ortholog, 
referred to as antiquitin, does exist. Figure 7 shows the same gene tree as in Figure 6 
with an additional human protein refen-ed to as antiquitin present in the tree. 

In yet another embodiment of the invention, a human ortholog of the 

20 murine Max-interacting transcriptional repressor Mad3 was identified through 

phylogenetic analysis of a gene family. The gene tree was constructed as follows. 
The protein sequences of known members of the Mad gene family were extracted 
from GenBank database. The extracted sequences were aligned using multiple 
alignment program CLUSTALW running on Sun SPARC station. Redundant and 

25 non-homologous sequences as well as distant homologs from S. cerevisiae, C 

elegans, D. melanogaster etc. were removed from the alignment. The refined set of 
sequences were realigned with CLUSTALW and a gene tree as presented in Figure 
1 8 A was computed. To identify a human ortholog of the Mad3 protein, a human 
dbEST at NCBI was searched with program TBLASTN using mouse Mad3 protein 

30 sequences as a query. Two highly homologous ESTs were identified and are 
presented in Figure 17 A. To obtain a complete coding sequence a search was 
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subtrees of orthologs in a gene tree, and then comparing the subtree in the gene tree 
with a species tree. A missing gene appears as a branch present in the species tree but 
absent in the gene tree. The algorithm for defining an orthologous gene subtree and 
predicting the undiscovered, or lost in evolution, genes is as follows: 



tree-making methods from a set of properly aligned homologous genes {1,2, s), 
such that one or more homologous genes from every species corresponds to pending 
vertices of Tg. Each gene is labeled with the species it comes from {I v->s) adding 
subscripts to distinguish homologous genes from the same species whenever it is 

1 0 necessary. Let Tg be the true species tree (tree correctly reflecting speciation events 
which we assume to be knovm) for species {1,2, s}. Due to the biological 
meaning of T, each species in this tree is represented only once. It is assumed that 
both Ts and Tg are binary, although it is straightforward to extend the algorithm 
described here to the case of multifurcated trees. 

15 Algorithm 

Al . For each pair of interior nodes from trees Tg and T^, compute similarity 



A3. Save Sgj as a new subtree of orthologs, save {SgJ - {S,j} as a set of species 



A4. Eliminate Sgj from Tg; T^: = TgVSgj. 
A5. Continue A2 - A4 until Tg is non-empty. 
The following definitions apply: 

Let Sgi be an ith subtree of Tg (corresponding to the ith interior node), 

25 correspondingly, let S^j be 7th subtree of tree T^. 

Let {Sgi} stand for an unordered set of species represented in Sgj such that each 
species is represented exactly once, and let |{5^,}| and {|5^J} be the number of 
entries in {S^} and the number of pending vertices in Sgj, respectively. Define by 
Ssj(Sgi) the unique subtree of that has leaves labeled exclusively with species from 

3^ I {iS^}!, so that each element of \{Sg.}\ is used i.e., that is, the unique subtree 
obtained by elinunating from Ssj all species that are not present in | {S ^} | . 



5 



Let Tg be the most likely gene tree identified with one of consistent 



A2. 




20 



that are likely to have gene of this kind (or lost it in evolution). 
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By applying phylogenetic analysis, i.e., reconstruction of gene trees of 
gene/protein sequences, one can predict the existence of undiscovered genes in 
humans and other species in addition to identifying the function of a gene. Such a 
technique is a significantly more powerful tool for identification of new genes than 
5 mere sequence comparisons. 

Methods of computing gene trees from a set of aligned sequences 
include the : (i) heuristic method based on an optimization principle which is not 
directly motivated by a probability model (Fitch, 1974 J. MoL Evol. 3:263-268)), (ii) 
the maximum likelihood method (Goldman, 1990, Syst. Zool. 30:345-361 ; Yang et 

10 al., 1995, Syst. Biol. 44:384-399; Felsenstein, J., 1996, Methods Enzymol. 266-41 8- 
427); and (iii) the distance matrix tree making method (Saito, N. and Nei, M., 1987, 
Mol. Biol. Evol. 4:406-425). Since the data analyses of orthologs and paralogs often 
involve very distantly related sequences, the maximum likelihood method is 
preferably used for small data sets and the distance-matrix method in other instances. 

IS To construct a reconciled tree according to the invention, the first step 

comprises a search for homologs in a publicly or privately available database such as, 
for example, GenBank, Incyte, binary BLAST databases, Swiss Prot and NCBI 
databases. Following the identification of homologous sequences a global alignment 
is performed using, for example, the CLUSTALW program. From the sequence 

20 alignment a gene tree is constructed using, for example, the computer program 

CLUSTLAW which utilizes the neighbor-joining method of Saito and Nei (1997, 
Mol. Biol. Evol. 4:406-425). Construction of a species tree is then retrieved from, for 
example, the following web site: 
http://www.3.NCBLNLM.NIH.GOV//taxomy.tax.html. 

25 The species tree and gene tree are given as input into the algorithm 

described below, which integrates both trees into a reconciled tree. Agreement 
between the gene tree and the corresponding species tree for any given set of 
sequences indicates the identification of orthologs. In contrast, disagreement between 
the species and gene tree suggest a gene duplication that resulted in the formation of a 

30 paralog. Thus, through generation of a reconciled tree one can identify orthologs 
present in one species but missing in another. These can be deduced by forming 



BNSDOCID: <WO_00e3e87A1_L> 



wo 00/63687 



PCT/USOO/10302 



29 

amino acid or nucleotide replacements per site or in terms of millions of years 
(absolute geological time). In the former case, the average replacement rate in the 
majority of the published trees varies among tree branches, and the root-to-tip 
distances are different for different present day sequences. In the latter case, all root- 

5 to-tip distances are equal and the height of each interior node of the tree corresponds 
to the absolute geological time passed since the gene duplication corresponding to the 
interior node took place. 

If a gene is unique, /.e,, represented with a single copy per genome 
rather than being a member of a family of similar genes, the correct gene tree 

1 0 depicting the origin of this gene in a few different species is identical to the species 

tree. In many instances, a single ancestral gene has been duplicated repeatedly during 
evolution to form a multigene femiily. A gene tree is constructed from a gene as it 
occurs in several species and reflects both speciation events and gene duplications 
within the same genome. Two homologous genes taken from different species that 

1 5 originated from the nearest conmion ancestor by speciation are referred to as 

orthologs, while any two genes that originated from the conmion ancestor via a series 
of events involving intragenomic duplications, or conversions, are called paralogs. 
The terms "ortholog" and "paralog" are applied to both nucleic acid and proteins 
herein. 

20 If gene deletions are forbidden and all genes for all species represented 

in the tree are known, the gene tree can be reconfigured to recapitulate the species 
tree, such that each subtree contains only orthologous genes. This tree is referred to as 
a reconciled tree £md is shown in Figure 5. Imperfect gene trees which contain 
incorrect or partial species subtrees can be used to build reconciled trees that indicate 

25 events of speciation, gene loss, and gene duplication. 

Orthologs from different species in gene trees are usually clustered 
together, so that if all the existing homologous genes from different species were 
known, the same relationship of species would be recapitulated in each cluster of 
orthologous genes. Since in reality a considerable number of genes are not yet 

30 identified, the real gene trees contain incomplete clusters of orthologs that can be used 
for identification of the missing genes. 
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lymphocyte apoptosis. This result indicates that the identified human Rptl homology 
may represent the gene in which genetic defects lead to CLL. 

The amino acid sequence of the human Rptl gene is presented in 
Figure 15. The present invention relates to nucleic acid molecules encoding the 
5 human Rptl protein shown in Figure 1 5. The invention also relates to nucleic acid 
molecules capable of hybridizing to a nucleic acid molecule encoding the human Rptl 
protein presented in Figure 1 5 under conditions of high stringency. By way of 
example and not limitation, procedures using such conditions of high stringency are as 
follows: Prehybridization of filters containing DNA is carried out for 8 hours to 

1 0 overnight at 65**C in buffer composed of 6x SSC, 50 mM Tris-HCl (pH7.5), ImM 
EDTA, 0.02% PVP, 0.02% FicoU, 0.02% BSA and 500 mg/ml denatured salmon 
sperm DNA. Filters are hybridized for 48 h at 65*'C in prehybridization mixture 
containing lOOmg/ml denatured salmon sperm DNA and 5-20 x 10* CpM of ^^P- 
labeled probe. Washing of filters is done at 37^C for 1 h in a solution containing 2x 

1 5 SSC, 0.01% PVP, 0.01% FicoU and 0.01% BSA. This is followed by a wash in 0.1 x 
SSC at 50°C for 45 minutes before autoradiography. Other conditions of high 
stringency which may be used are well known in the art. 



5.4. GENE DISCOVERY THROUGH PHYLOGENETIC 

ANALYSIS OF GENE FAMILIES 

20 The present invention provides a method for identifying novel genes 

comprising the following steps: (i) comparing a single sequence with a database; (ii) 
processing the output into a sequence alignment; (iii) computing gene trees; and (iv) 
analyzing the trees to predict the existence of undiscovered genes. 

Figure 5 shows a "species tree," a "gene tree" and a "reconciled tree". 
25 A **species tree", as defined herein, is a graph depicting the correct order of speciation 
events leading to a set of present day species as defined by taxonomy. A "gene tree*' 
is a graphical representation of the evolution of a gene from a single ancestral 
sequence in a conunon progenitor to a set of present-day sequences in different 
species. Where gene duplication has occurred, a branch is bifurcated. The branch 
30 lengths of a gene tree are most frequently measured either in terms of the number of 
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Once developed, the specialized databases can be used to identify 
novel genes based on computation and analysis of phylogenetic trees for multigene 
families and analysis of homologous regulatory networks. 

In a specific embodiment of the invention, a specialized database was 

5 generated using a set of keywords defining proteins involved in apoptosis (see. Figure 
4). The specialized sequence database was referred to as Apoptosis 3. As a first step 
in generating the specialized database, a comprehensive set of articles describing the 
system of apoptosis or programed cell death was compiled. The articles were 
analyzed and information on regulatory pathways characterizing apoptosis from a 

10 variety of different organisms was extracted. Such pathways included those involved 
in MHC-T cell receptor interactions, inflammatory cytokine signal transduction, 
induction by light, yradiation, hyperosmolarity or heat shock, pathways involving 
inununoregulatory receptors or receptors having cytoplasmic domains, integrin- 
related pathways and perforin/granzymep related pathways. The collected 

1 5 information was stored using Powerpoint (Microsoft) as a collection of graph/plots 
depicting the regulatory pathway. In addition, a list of proteins relevant to regulation 
of apoptosis was compiled. 

Using the program Psi Retriever, sequences encoding the proteins 
relevant to regulation of apoptosis were retrieved from the non-redundant (NCBI) 

20 database of protein sequences and stored as a FASTA file. The FASTA file was then 
converted to a binary blast database using the program FORMATDB from the 
BLAST suit of programs. The BLAST suit of programs provides a set of programs 
for very fast comparisons of a single sequence to a large database. Both the database 
and the search or query sequence can be any combination of nucleotide and/or amino 

25 acid sequences. 

In a working example described herein, the Apoptosis 3 database was 
used to compare genomic and cDNA sequences derived from the 13q region of human 
chromosome 13. This region of the chromosome is associated with Chronic 
Lymphocytic Leukemia (CLL). Using this method of analysis a human gene with 

30 significant homology to the mouse Rptl gene was identified. When the activity of 
Rptl is knocked out in mice, the regulatory effect is manifested as a block in T- 
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interactions between them. Such databases are particularly useful for computation 
and analysis of regulatory networks between proteins. The semantic model is designed 
for representing substances, such as proteins and actions between them, and is based 
on widely accepted principles of object-oriented programming languages such as Java. 
5 Figure 3 is a diagram illustrating the object representation of molecules and relations 
between them. As indicated in Figure 3 there are six major classes, corresponding to 
the top-level classification of objects and actions: (i) a substance; (//) a state of a 
substance; (///) a similarity between substances; (/V) an action between substances; (v) 
a result of the action; and (v/) a mechanism that enables an action. 

1 0 Figure 3 presents the class design graphically, listing the variables that 

represent the properties of each class or class object in the implementation. Classes 
can be made nested via the mechanism of inheritance", /.e., classes are defined 
starting with the most general ones and moving towards more specific classes. 
Definition of more specific classes is simplified because the properties of the general 

15 classes are "inherited" by the specific classes and need not be redefined each time 

(see, Flanagan 1997, Java in a Nutshell, Second Edition. O'Reilley & Associates, Inc. 
Sebastopol, CA). 

As shown in Figure 3, the two key object types in this scheme are 
substances (nodes of the graph representing regulatory networks) and actions 

20 (oriented edges connecting pairs of nodes), while result and mechanism objects are 
auxiliary to object action. Each substance object is characterized with a state. In this 
scheme, action is the most complicated object; each action object is characterized by a 
specific pair of substances participating in the action, one of which can be active and 
is referred to as Subject Substance and the second of which can serve as a substrate for 

25 the former and is referred to as Object Substance. Furthermore, for each action the 
initial and final states corresponding to interacting substances are defined. The 
property Time Required of each Action Object allows the setting of different 
durations for different actions (time is measured in relative units; see Ren€ Thomas 
and Richard D'Ari, 1990, "Biological Feedback," CRC Press Boca Raton, Ann Arbor, 

30 Boston). 
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Hidden Markov Model method for building domain/motif models include neural 
network motif analysis (Wu, C.H. et al., 1996, Comput Appl Biosci 12, 109-18; Hirst, 
J.D., 1991, Protein Eng 4:615-23) and positional weight matrix analysis (Claverie, 
J.M., 1994, Comput Chem 18:287-94; Venezia, D., 1993, Comput Appl Biosci 9:65- 
5 9; Bucher, P. 1996, Comput Chem, 20:3-23; Tatusov, R.L., 1994, Proc Natl Acad Sci 
USA 91:12091-5). 

Once a comprehensive collection of motifs/domains is created, each 
particular protein may be compared against a complete database of HMMs to identify 
known motifs and domains. 
10 The Hidden Markov Model (HMM) is built using the following steps: 

A 1 . Start with a motif/domain name and a single amino acid sequence 

representing a domain or motif. 
A2. Do PSI-BLAST (BLASTPGP) search with the motif/domain sequence 
against a protein non-redundant database. 
15 A3. Retrieve the sequences identified in the database search from the 

protein sequence database. Exclude low-complexity sequences, short 
or incomplete sequences and sequences with similarity score above a 
selected threshold of PPD value <0.001 
A4. Align the set of sequences with CLUSTALW (or other multiple 
20 sequence alignment program). 

A5. Use the set of aligned sequences for building HMM with the programs 
provided with HMMER and HMMER2 packages (see Hughey and 
Krogh 1996, J. MoL Biol. 235:1501-1531). 
A6. Do a new database search comparing new HMM with the non- 
25 redundant protein database. 

A7. Continue steps A3-A6 until the convergence of the Markov model /.c, 
until no new sequences are identified, or the maximum allowed 
number of iterations as defined by the user is reached. (Hugh R. and 
Krogh A., 1996, Comput. Appl. Biosci. 12: 95-107). 
30 In addition, in yet another embodiment of the invention, a specialized 

database may be designed to contain a semantic model of proteins and of the possible 
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For example, but not by way of limitation, a specialized database may 
be prepared as follows. Protein and gene sequences may be provided, for example, by 
the Java program PsiRetrieve which allows for quick retrieval of protein or nucleotide 
sequences from binary BLAST databases by sequence accession number, keyword or 
5 groups of keywords, or species name. In addition, using the program PsiRetriever, 
sequences encoding the proteins of interest may be retrieved from the non-redundant 
(NCBI) database of protein sequences and stored as a FASTA file. The FASTA file is 
then converted into a binary blast database using the program FORMATDB from the 
BLAST suit of programs. 

10 Known motifs/domains for proteins may also be collected using the 

flat file versions of major protein databases, such as SwissProt 
(http://expasy.hcage.ch/sprot) and the non-redundant database of NCBI 
(http://www3.ncbi.nlm.nih.gov). The databases can be downloaded and searched for 
the keywords "motif and "domain" in the feature tables of proteins. In addition, 

1 5 existing databases of motifs and domains, such as BLOCKS 

(http://dupsas.Weizmann.ac.i1/bcd/bcdparent//databanksblocks/hfml) and 
pfam(http://www.sanger,ac.uk//software/pfam; http://pftn.wustl.edu), can be 
downloaded (Henikoff et al., 1991, NAR 19:6565-6572). Still further, it is understood 
that any publically available database containing gene/protein sequences may be 

20 utilized to generate the specialized databases for use in the practice of the present 
invention. 

Homologous sequences may be aligned using, for example, the 
CLUSTALW program (Higgins, et al. 1996 Methods in Enzymology 266: 383-402). 
A protein's sequence corresponding to each domain/motif can be identified, saved 

25 and used for building a Hidden Markov Model (HMM) of the domain/motif using a 
HMMER and HMMER2 packages (see, Durbin, R. et al. 1998 in Biological Sequence 
Analysis: Probablistic Models of Proteins and Nucleic Acids). HMMER and 
HMMER2 packages are useful for (i) building HMMs from sets of aligned protein or 
nucleotide sequences, and (ii) comparing the HMMs vsdth sequence databases aimed 

30 at identifying significant similarities of HMMs with database sequences. Both 
nucleotide and protein databases can be used for this purpose. Alternatives to the 
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Working with nucleotides implies that errors involving reading frames 
must be addressed. For example, working with a code of four letters, the nucleotide 
combination ATCTGTCACG could mean ATCT/GTCA or TCTG/TC AC or 
CTGT/CACG . Since the text is translated into a nucleotide combination, only one of 
5 these possibilities is correct. But BLAST can not distinguish between these solutions, 
Le,, BLAST would potentially match a database sequence to a wrong reading frame in 
the query sequence, producing many nonsense results that could compromise the 
significance of true results. 

The solution to this problem is a comma-free code. A conmia free code 

10 knows only one correct reading frame. BLAST therefore does not produce any 

nonsense results. A comma-free code consists of only one permutation of a nucleotide 
combination. For example, given the nucleotide combination ATCC and its 
permutations CATC, CCAT and TCCA, only ONE of these permutations would be 
included in a conmia-free code. The code in Appendix £ does represent a comma free 

1 5 code. Comma-free codes were discussed in the early days of DNA research (Crick et 
aL, Proc, Natl. Acad. Sci. 43:416-421). 

In order to fine-tune the matching process, different BLAST 
parameters must be adjusted, for example: word size (which sets the size of the high 
scoring words, thus influencing the sensitivity of finding }lSPs);mi5match penalty 

20 (exact vs approximate matching); numbers of alignments to show (true matches of low 
significance can sometimes be at the very end of the BLAST output, therefore many 
alignments have to be shown); and expectation value (which sets the significance 
value for matches in the output file). 

5.3. GENERATION OF SPECIALIZED DATABASES 
25 In accordance with the present invention, specialized databases may be 

developed that contain information derived from unpublished data, publications such 
as research articles, theses, posters, abstracts, etc. and/or databases concerning 
interactions among genes and proteins, their domain/motif structure, and their 
biological functions. 
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In addition, the script looks for plurals of words. For example, " interleukins" should 
be recognized as a protein name, although only the singular form, "interleukin", is in 
the database. 

The final resuU consists of the original journal article with XML tags 
5 surrounding the gene and protein names. This is done using the same script as in 
Appendix G: 

blocked <phr sem="gp">T cell antigen receptor</phr> (TCR)- and 
<phr sem="gp">CD28</phr>-mediated <phr sem="gp">IL-2</phr> gene 
transcription. Therefore, <phr sem="gp">Rapl</phr> functions as a negative 
10 regulator of... 

To adapt the problem to BLAST'S statistical foundation, different 
measures were undertaken to limit the output to the most relevant gene and protein 
names. 

BLAST is sensitive to the search space the program works in. Thus, 

1 5 given a long query sequence and a large sequence database, matches have a lower 
statistical significance because the chances are higher that the matches could have 
occurred by chance alone. In addition, matches with few letters have a lower statistical 
significance than matches with many letters. In order to find all true matches with any 
significance level, some measures were undertaken to address this problem. For 

20 example, (i) the query sequence was divided into 10 equal length parts, Le., the 

joiimal article was divided into 10 parts and 10 different queries are run on each part 
separately; (ii) the sequence database (with the gene and protein names) is separated 
into 5 databases, each containing protein/gene names of different length; (iii) gene 
and protein names with less than 3 letters in the database were 'expanded*, Le., spaces 

25 were added at the beginning and the end of the name. Doing so, the statistical 

significance of a match containing a short name was higher. A space does not only 
include an empty character. For example, a gene name "k4" could occur in a journal 
article as "kinin 4 (k4)". It was therefore important to define several characters as 
substitutes for a space character. The alphabet in Appendix E defines the nucleotide 

30 combination ATCC as such a substitute. 



BNSDOCID: <WO_0Cie3e87A1_L> 



wo 00/63687 



PCT/USOO/10302 



21 

using a Perl-script (see Appendix F). The script shown in Appendix G scans the 
output file, which is sometimes several megabytes long, for any segments that start at 
position 1 of the database sequence (thus disregarding any segments that are only part 
of the sequence). In addition, the script allows for 10% mismatches between the 
5 aligned sequences for long sequences (as shown in the script of Appendix E), or 0% 
mismatches for short sequences. After scanning the output file, an intermediary file 
that lists the candidate sequences is created: 

tran|365|38I|gp|18493 

trani 1(1 7|gpl 18493 
10 peci|549|565|gp|58106 

il-2|621|637|gp|82396 

il-2|325|341|gp|82396 

gati|193|209|gp|92088 

prod|641|657|gp|52292 
15 rapl|105|121|gp|49898 

spec|545[561|gp|33183 

crip|385|401(gp(1 18905 

crip|21|37|gp|l 18905 

as|161|177|gp|133961 
20 her|65|77|gp|88411 

The intermediary file lists the name of the sequence, followed by the 
starting and end point in the query sequence (corresponds to where the two sequences 
matched), the semantic class of the name (protein, gene or protein/gene). The last 
number is not considered. 

25 The intermediary file is then scanned by another Perl program 

(Appendix G). This program compares the starting end points with the actual text, 
making sure that the matched name is an 'autonomous' entity in the query text. For 
example, while "per" in " per gene" should be recognized as a gene name, "per" in 
"personal" should not be recognized as a gene name. The program recognizes other 

30 characters than the space character delimiting an 'autonomous' gene or protein name. 
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the absence of costimulation, T cells activated through their antigen ..." is translated 
into 

"AAGTACAGATCCACGGAAGGAACGATCCAAACAAAGACGCAACGACAG 
AAATAACGATCCACATAACTATCCAAATACATACGCACGGAAGTACACAC 
5 GTAATTAAACACGGAAGTACATACAGATCCATCCACGGATCCAAATAACG 
AATTAATTACGCATCCAAACAAATACGGAAGTACTCAAACACGGAACGAA 
CCATCCACGGAAGGACCTACATACGTAAGCAAGGATCCACGGAAGGAAC 
GAAGTACCTATCCAAACACAGACGGAAGTAAGCAACGACAGATCC " 
A query is then used to match the translated journals against the 
1 0 nucleotide representation of gene and protein names in the BLAST database. The 

query is executed using the blastall program that is included in the BLAST package. 
The query line looks like: 

blastall -p blastn -d FASTA.dat -i query.txt 
The flag *p' denotes the sub-program (blastn is a sub-program of 
1 5 blastall that performs nucleotide matches), 'd* denotes the file that contains the 
FASTA entries and 'i' denotes the translated query text. 

Significant alignments associated with gene and protein names are 
listed in the BLAST output file. This is an excerpt from a BLAST output file: 
gi|63624 species,gp,ner 
20 Length ^ 12 

Score = 24.4 bits (12), Expect = 3e-05 
Identities = 12/12 (100%) 
Strand = Plus / Plus 
Query: 729 acagaacgacct 740 
25 Sbjct: 1 acagaacgacct 12 

The first line denotes the database entry. The second line denotes the 
database sequence length, followed by the alignment score and the E-value. The next 
line indicates paired matches, mismatches and gapped alignment (the latter two are 
not shown in this example). The lines 'Query* and 'Sbjct' show the actual alignment 
30 between the query and database sequence. This output file is subsequently processed 
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hox al 
wac 3*-end 
pit-l/ghf-1 variant 
[...] 

5 This list of gene and protein names is translated into a different 

alphabet system by substituting each character in the name with a predetermined 
unique nucleotide combination. The conversion chart is listed in Appendix E. The 
encoded names are then imported into the BLAST database using the FASTA format. 
For example, the first entry in the list above is "gfap gamma." After translation using 
10 the conversion chart, the same name appears as follows: 

AGCAACTAAACACCCATCCAAGCAAACACACACACAAAC 
Thus, the complete FASTA entry looks like this: 
>gill species,gp,gfap gamma 

AAGCAACTAAACACCCATCCAAGCAAACACACACACAAAC 

1 5 In FASTA, the definition line (marked v^th '>') contains information 

about the database entry. This line can contain any kind of information. The 
information important for this particular example is the third entry in the definition 
line, 'gp', that specifies that the name can represent a gene or a protein. If the name is 
unambigous, then the definition line states that the name is only associated with a 

20 gene fg*) or protein ('p'). The fourth entry in the definition line is the name of the 
protein or gene, "gfap gamma" in this case. 

The second line in the FASTA format normally contains the actual 
sequence of the protein/gene. In the example presented, the second line contains the 
translated protein or gene name. 

25 All gene and protein names are translated into the nucleotide 

representation and converted into the FASTA format. Then, the database containing 
these FASTA entries are specially compiled for use in BLAST queries using a 
program that is included in the BLAST package called "formatdb". 

Thus, the scientific journals are translated, using the same nucleotide 

30 combinations, into a continuous string of nucleotides. For example, the sentence "In 
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signifying that the ^ent has not been specified; the second argument is a protein with 
the value jnk. The second argument is the target: 

[action,inactive.[protein,badl,[action,phosphorylate,x,[proteinjnk 
In summary, a computer system has been disclosed that generates 
5 structured information concerning protein and gene interactions and relationships. 

5.2. USE OF BLAST FOR FINDING GENE AND 
PROTEIN NAMES IN JOURNAL ARTICLES 
In a specific embodiment of the invention, an exhaustive list of gene 
and protein names, extracted from GeneBank, is translated into a different alphabet 

10 system by substituting each character in the name with a predetermined unique 
nucleotide combination. The encoded names are then imported into the BLAST 
database using die PASTA format. The scientific journals are translated, using the 
same nucleotide combinations, into a continuous string of nucleotides. A query is then 
used to match the translated journals against the nucleotide representation of gene and 

1 5 protein names in the BLAST database. Significant alignments associated with gene 
and protein names are listed in the BLAST output file, which is subsequently 
processed using Perl-scripts. The final result consists of the original journal article 
with XML tags surrounding the gene and protein names. 

To adapt the problem to BLAST'S statistical foundation, different 

20 measures were undertaken to limit the output to the most relevant gene and protein 
names. In addition, in order to fine-tune the matching process, different BLAST 
parameters were adjusted, such as the word size (which sets the size of the high 
scoring words, thus influencing the sensitivity of &ding HSPs) and mismatch penalty 
(exact vs approximate matching). 

25 In a specific embodiment of the invention, gene and protein names are 

extracted from GeneBank's gene symbol index file. The following is an excerpt of the 
file after discarding entries that are either composed of only nimibers or of less than 
two alphabetic letters: 

gfap gamma 

30 hoxalO 
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repeated until an analysis of each segment is obtained or until segmenting is no longer 
possible. 

Mode 3 requires a well-formed pattern for the "largest*' prefix of the 
segment, i.e., usually at the beginning of the segment. This occurs when a sentence 
5 contains a pattern at the end which is not in the granunar but a beginning portion that 
is included. For example, in "bad inactivates jnk at this time", the beginning of the 
sentence "bad inactivates jnk" will be parsed and the remainder will be skipped. 

Mode 4 requires that undefined words be skipped and an analysis be 
attempted in accordance with Mode 1 . Mode 4 is useful where there are 
10 typographical errors and unknown words. For example, in the phrase "abc bad 

inactivates jnk the word abc is unknown to the system and will be ignored but the 
remainder of the phrase will be parsed. 

Mode 5 first requires that the first word or phrase in the segment 
associated with an action be found. Next, an attempt is made to recognize the phrase 
15 starting with the leftmost recognizable argument. For example, in "during bad 

inactivates jnk on the fifth day," the phrase "bad inactivates jnk" will be parsed and 
the remaining words will not be. If no analysis is found, recognition is retried at the 
next possible argument to the right. This process continues until an analysis is found. 

Process sects with get section and parse ^sentences gets each section 
20 and generates intermediate output for the sentences in each section. 

Write produces the output as a list consisting of relations and 

interactions 

Setargs sets arguments or parameter values based on user input or by 

default. 

25 The structured output generated by the GENIE program uses a fi-ame- 

based representation. Each frame specifies the informational type, the value, and 
arguments or modifier slots which are also frames. Consider the text data input "bad 
inactivates the phosphorylation of jnk." A corresponding output, as shown below, is a 
ftame denoting an action, u^ich has the value inactivate; in addition, there are two 

30 arguments. The first argiunent is a protein bad and the second argument is an action 
with the value phosphoiylate, which has two arguments. The first argument is x 
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mode) and Protocol (html or plain). Process jents is called by another predicate, after 
user-specified parameters have been processed. 

The parsing modes are selected by GENIE so as to parse a sentence or 
phrase structure using a grammar that includes one or more patterns of semantic and 
5 syntactic categories that are well-formed. For example, for the phrase "bad inactivates 
jnk" a legitimate pattern can be substance 1 action substance2, wherein substance 1 = 
protein bad, action = "inactivates" and substance2 = "jnk." However, if parsing fails, 
various error recovery modes are utilized in order to achieve robustness. The error 
recovery techniques use methods such as segmenting the sentence, processing large 

10 chunks of the sentence, and processing local phrases. Each recovery technique is 

likely to increase sensitivity but decrease specificity and precision. Sensitivity is the 
performance measure equal to the true positive rate of the natural language processing, 
Le. , the ratio of information extracted by the natural language processing system that 
should have been extracted. Specificity is the performance measure equal to the true 

1 5 negative information rate of the system, /.e., the ratio of information not extracted by 
the NLP system that should not have been extracted. Precision is the reliability of the 
system, i.e., the ratio of information extracted correctly compared to all the 
information that was extracted. In processing a report, the most specific mode is 
attempted first, and successive less specific modes are used only if needed. 

20 In accordance with the preferred embodiments of the present invention, 

the parser of Figure 2 includes five parsing modes. Modes 1 through 5, for parsing 
sentences or phrases. Nominally, the parser is configured to first select Mode 1 . If 
Mode 1 is not possible, the program continues with Mode 2 and so forth until parsing 
is complete. With Mode 1 , the initial segment is the entire sentence and all words in 

25 the segment must be defined. This mode requires a well-formed pattern for the 
complete segment. 

Mode 2 requires that the sentence or phrase be segmented at certain 
types of words or phrases, e.g., " is attributable to." Here, an attempt is made to 
recognize each segment independently, /.e., a first segment ending with the word "is" 

30 and a second segment beginning with the word after "to." The segmenting process is 
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formed syntactic and semantic patterns in the sentence and to generate structured 
output forms. The parser proceeds by starting at the beginning of the sentence element 
list and following the grammar rules. When a semantic or syntactic category is 
reached in the grammar, the lexical item corresponding to the next available 
5 unmatched element is obtained and its corresponding lexical defmition is checked to 
see whether or not it matches the grammar category. If it does match, the word or 
phrase is removed from the unmatched sentence list, and the parsing proceeds. If a 
match is not obtained, an alternative granmiar rule is tried. If no analysis can be 
obtained, an error recovery procedure is followed so that a partial analysis is 

10 attempted. The actual grammar used for GENIE appears as Appendix D. 

The parser module of GENIE uses the lexicon, and a grammar module 
to generate target forms. Thus, in addition to parsing of complete phrases, subphrase 
parsing can be used to an advantage where highest accuracy is not required. In case a 
phrase cannot be parsed in its entirety, one or several attempts can be made to parse a 

15 portion of the phrase for obtaining useful information in spite of a possible loss of 
information. 

Conveniently, each module is software-implemented and stored in 
random-access memory of a suitable computer, e.g., a work-station computer. The 
software can be in the form of executable object code, obtained, e.g., by compiling 

20 from source code. Source code interpretation is not precluded. Source code can be in 
the form of sequence-controlled instructions as in Fortran, Pascal or "C", for example. 
Altematively, a rule-based system can be used such a Prolog, where suitable 
sequencing is chosen by the system at run-time. 

An illustrative portion of the GENIE system is shown in the Appendix 

25 D in the form of a Prolog source listing with comments. The following is further to 
the comments. 

Processjsents with getjnputsents, process_sects and outputresults 
reads in an input stream, processes sections of the input stream according to parameter 
settings, and produces output according to the settings, respectively. Among 
30 parameters supplied to Process jsertts are the following: Mode (specifying the parsing 
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syntactic and semantic. The syntactic lexicon for actions specifies the main syntactic 
categoiy such as * V for verb, "ving" for progressive form of verb, and "activation" 
for noun. The semantic entries for actions not only categorize the actions, but also 
specify features for each action. For example, one feature provides the number of 

5 arguments that are expected for the action, i.e., some actions are associated with two 
arguments because they have an agent and a target as "inactivate", and others just 
have an agent "mutate." The lexicon of substances and structures appears as 
Appendix A; the syntactic lexicon for actions appears as Appendix B; and the 
semantic lexicon of actions appears as Appendix C. 

10 A second feature specifies whether or not the arguments should be 

reversed when obtaining the target form. For example the argimients of "attributable 
to" should be reversed, i.e., in *the phosphorylation of jnk is attributable to the 
activation of bad", the underlying action is "cause" (from "attributable to"), the agent 
is the "activation of bad" and the target is **the phoshorylation of jnk"), whereas the 

1 5 arguments of "activates" is not( i.e. in "jnk activates bad" , the agent is "jnk" and the 
target is "bad"). 

Figure 2 shows a preprocessor module of GENIE by which natural- 
language input text is received. The preprocessor thus performs lexical lookup to 
identify and categorize multi-word and single word phases within each sentence. The 

20 output of this component consists of a list of word elements where each element is 
associated with a word or multi-word phrase in the report. For example, assuming 
that the sentence "bad functions as a negative regulator of the activation of jnk" is at 
the beginning of the report, it would be represented as a list of elements where each 
element is a word or phrase. For example, element 1 is associated with "bad", 

25 element 2 with the multi-word phrase "functions as a negative regulator of, element 8 
with "the", and element 9 with "activation". The remainder of the list of word 
positions would be associated with the remaining words in the report. Some of the 
phrases may not need lexical lookup because they already have been tagged by a 
previous component. Such a tagging system is described below in Section 5.2. 

30 The second component of the GENIE system is the parser. It utilizes 

the grammar and categories assigned to the phrases of a sentence to recognize well- 
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A natural-language phrase included in text document is understood as a 
delimited string comprising natural-language terms or words. The string is computer 
readable as obtained, e.g., from a pre-existing database, a keyboard input, optical 
scanning of typed or handwritten text, or processed voice input. The delimiter may be 

5 a period, a semicolon, an end-of-message signal, a new-paragraph signal, or any other 
suitable symbol recognizable for this purpose. Within the phrase, the terms may be 
separated by another type of delimiter such as a blank or another suitable symbol 

As a result of phrase parsing, terms in a natural-language phrase are 
classified, (e.g., as referring to a gene, a protein, or their interactions) and the 

1 0 relationships between the interactions are established and represented in a standard 
form. For example, in the sentence "Rap inhibited fyn", the structured form would 
be: 

[action,inactivate,[protein,rap],[protein,fyn]]. 
In such an example, the interaction is "inactivate", the agent is "Rap" and the target 
1 5 is "fyn," More complex sentences consisting of nested relationships, such as "The 
activation of BAD was suppressed by the phosphorylation of JNK" can also be parsed 
and represented appropriately. The structured output form for this sentence would be: 
[action,inactivate,[action,phosphorylate,x,|proteinjnk],[action,activate,x,[protein,bad] 

] 

20 In the first example, the primary interaction is "inactivate"; in the second example, an 
interaction "phosphorylate" is the agent where the protein "jnk" is its target (the agent 
of "phosyphorylate" in not specified and thus is represented as "x"). In this example, 
the target of "inactivate" is also an interaction "activate" where the target is the 
protein "bad" and the agent is unknown. 

25 While parsing is based on both syntactic and semantic grammatical 

patterns, the substances in a domain are normally only semantic categories such as 
"protein", "gene", and "small molecule." There are no corresponding syntactic 
categories needed for these substances because they are normally all nouns. However, 
each action can be categorized both semantically and syntactically. An action, which 

30 is a semantic category, can generally occur syntactically as a verb "inactivate" or as a 
noun "inactivation." Therefore there are two sets of lexical entries for the actions: 
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5.1. THE NATURAL LANGUAGE PROCESSING 
The present invention relates to a natural language processing system 
that is designed to parse the electronic versions of articles published in journals that 
report on structural interactions among genes and proteins. The system provides a 
5 method for extracting information on interactions among genes and proteins, their 
domain/motif structure, and/or their sub-cellular and tissue expression/distribution 
patterns, followed by computer representation of such information. 

The general natural language-processing system of the invention is 
schematically depicted in Figure 2. The collection phase automatically collects 

1 0 articles from appropriate literature, and selects articles that contain relevant 

information using Keyword search techniques. In the next phase, the preprocessor 
standardizes the selected articles so that they consist of tagged ASCII text where the 
tags delineate critical components of the article. The next phase, termed the extraction 
phase, retrieves and classifies biological entities, as names of proteins, genes and 

1 5 small molecules. In addition, the relationship extraction phase recovers structural 

relationships between the entities. This phase is followed by a phase which performs 
an analysis of the sequence of events. 

The final phase of the system processes the output extracted from an 
article to remove redundancies, inconsistencies and to incorporate implicit 

20 information before adding the extracted knowledge consisting of biological entities, 
their attributes, conditional constraints, and relationships between them, for 
subsequent use in analysis and hypothesis testing. The information extraction system 
as depicted in Figure 2, referred to herein as "GENIE," is designed for use as a general 
processor within the domain of genomics literature although the system may also be 

25 used in other specialized domains. GENIE is an adaptation of MedLEE developed for 
the medical domain. GENIE uses the same source code as MedLEE but the Lexicons 
and grammar were adapted for genomics literature. 

The information extraction system of the present invention is described 
below, by way of example, with reference to the genomics domain uses of GENIE. It 

30 is written in Quintus Prolog and uses the Unix or Windows operating systems, as 
described in detail below. 
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Figure 17B. Nucleotide sequence of the human Mad3 gene. 

Figure 17C. Complete sequence of the human Mad3 protein. A search 
was conducted to identify overlapping sequences. The complete sequence of the gene 
was assembled and the amino acid sequence deduced. The translated human Mad3 
5 sequence consists of 206 amino acid residues 81% of which are identical to the mouse 
Mad3 protein. 

Figure 17D. Multiple alignment of the human Mad3 amino acid 
sequence with known Mad proteins. 

Figure 18A. Phylogenetic tree indicating relationship between three 
1 0 known mouse Mad genes and their two human homologs. 

Figure 1 8B. Phylogenetic tree including new human Mad3 sequence. 
The phylogenetic tree indicates that the new human gene belongs to the family of Mad 
proteins and is an ortholog of mouse Mad3. 

15 5. DETAILED DESCRIPTION OF THE INVENTION 

The present invention provides methods for identification of novel 
genes comprising: (i) generating specialized databases containing infomiation on 
gene/protein structure, function and regulatory interactions and, (ii) sequence analysis 
which includes homology searches and motif analysis thereby identifying a putative 

20 novel gene of interest. The invention may further comprise performing simulation and 
hypothesis testing to identify or confirm that the putative gene is a novel gene of 
interest. 

The specialized databases are constructed utilizing information 
concerning gene/protein structure or function derived firom unpublished data, research 

25 articles and/or existing databases. The specialized databases can be used to identify 
novel genes by: (i) searching for motifi'domain combinations characteristic for a 
putative gene of interest; (ii) phylogenetic tree analysis of homologous genes for 
predicting the existence of yet undiscovered genes; (iii) comparing members of 
interactive gene/protein networks from different species for predicting the existence of 

30 yet undiscovered genes; and (iv) testing a hypothesis with regard to known 
interactions of homologs from other species in regulatory pathways. 
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Figure 11 A and 1 IB are diagrams respectively representing 
hypothetical examples of evaluating the impact of a "knockout" of hypothetical gene 
A on the expression of a hypothetical gene B. The effect of knock-out of gene A 
calculated by multiplication along the shortest pathway connecting genes A and B is 
5 inhibition of gene B, the resulting effect being zero if the orientation of only one arc in 
the same pathway is reversed; 

Figure 12 is a flow chart representing the scheme of gene discovery 
analysis involving motif/domain analysis in accordance with the present invention; 
and 

1 0 Figure 1 3 Identification of genes in C. elegans containing either POZ 

or kelch domains. The protein excession numbers are indicated adjacent to the 
different protein domains, the protein corresponding to accession number gi/1 132541 
contains a POZ domain, death domain, kinase domain and heat repeat. 

Figure 14 A. Two human sequences with the closest homology to the C. 
1 5 eiegans sequence gi/1 13254 1 . 

Figure 14B. Computed gene tree indicating that the identified human 
gene represents an ortholog of the C eiegans gene gi/1 1 32541 . 

Figure 14C. Nucleotide sequence of the death domain gene. 

Figure 14D. Deduced amino acid sequence of the death domain 

20 protein. 

Figure 15. Identification of candidate gene implicated in the etiology of 
Chronic Lymphocytic Leukemia (CLL). Sequence homology between a CLL region 
open reading frame and mouse Rptl (sp/Pl 5533/RPTl) is presented. 

Figure 16A-B. Model of regulatory functions of Rptl. Figure 16A 
25 indicates that in mouse T lymphocytes Rptl serves as a repressor of the gene for 

interleukin 2 receptor (IL-2R). Figure 16B demonstrates that when Rptl is knocked 
out, the regulatory effect is manifested as a block of the apoptotic pathway for T- 
lymphocytes resulting in accumulation of T-lymphocytes in blood. 

Figure 17A. Two EST sequences identified by searching a protein 
30 dbEST using the mouse Mad3 protein as a query. 
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is a graph depicting a history of a few genes from the same species, where each 
species can be represented by multiple paralogous genes (because the set of known 
genes is incomplete for most genomes, and there are often multiple representations of 
the same gene family in the same genome, the gene tree can be drastically different 
5 from the corresponding species tree); and a "reconciled tree", which is the gene tree 
that would be obtained if gene deletions were completely forbidden and all genes were 
known for all species under analysis; 

Figure 6 shows the original tree of ALDH sequences, indicating 
sequence clusters where bacterial, plant, fimgal and nematode orthologous genes are 
1 0 present, but a human ortholog was not yet known; 

Figure 7 shows the same phylogenetic tree as in Figure 6 with an 
additional human protein, referred to as antiquitin which was discovered by the 
method of the invention; 

Figure 8 is a schematic diagram illustrating functional network-based 
1 5 gene discovery in accordance with the present invention; 

Figure 9A presents diagrams depicting the regulatory relationships 
among hypothetical proteins (denoted with Arabic numerals) of hypothetical species 
A and B. Proteins in different species denoted with the same numeral are considered 
orthologous. The diagrams show that regulatory relationships between a pair of 
20 proteins can be of three different kinds; 

Figure 9B, 9C, and 9D are diagrams representing Boolean operations 
OR, AND, and XOR, on arcs of the two oriented graphs of Figure 9A, the same 
operations being applicable to the set of vertices of the two oriented graphs; 

Figure 10 is a diagram representing a hypothetical example of defining 
25 homologous protein networks in two different species using protein motifs, the 

diagram showing only two hypothetical proteins (1 and 2) for species A and three 
hypothetical proteins (1, 3, and 4) for species B. Protein 1 in both species has motifs 
a and P, protein 2 has motifs 5, e, and C> and proteins 3 and 4 have motifs 6 and C, 
and €, respectively. The motif analysis can indicate that proteins 3 and 4 in species B 
30 may collectively perform the same function as protein 2 in species A; 
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predict undiscovered genes. This program also includes a set of tools for generating 
motif/domain models from multiple sequence alignments of known genes and for 
using these models for extraction of structurally and/or functionally homologous 
sequences from databases which contain raw sequence data. 

5 The invention further provides for a simulation and hypothesis testing 

program which relies on the specialized databases of gene/protein interactions for 
identifying potentially undiscovered members of multigene families through 
comparisons of regulatory networks for different species and testing hypotheses with 
regard to regulatory cascades. A comparison of homologous regulatory networks 

10 within the same organism and between different species of organisms will allow the 
identification of genes absent in one of the systems under comparison, thus providing 
a set of candidate genes. In this way, genes that contribute to the phenotype of a 
specific disease associated with a particular biological system under analysis may be 
identified, mapped and subjected to mutational analysis and functional studies. 



15 4. BRIEF DESCRIPTION OF THE DRAWINGS 

Figure 1 is a block diagram illustrating the three major programs of the 
method according to the present invention: (i) the generation of specialized databases 
based on information on gene/protein structure, function and regulatory interactions 
derived from research papers and databases; (ii) sequence analysis; and 
20 (iii) simulation and hypothesis testing; 

Figure 2 is a block diagram of an information extraction system in 
accordance with a preferred embodiment of the present invention; 

Figure 3 is a diagram illustrating the object representation of molecules 
and relations between them; 
25 Figure 4 shows a set of keywords defining proteins involved in 

apoptosis pathways, these keywords having been utilized for generating a specialized 
sequence database ApoptosisS, this list having been compiled manually for testing the 
concept of specialized databases; 

Figure 5 shows a "species tree," which is a graph depicting the correct 
30 order of speciation events leading to a set of present day species; a **gene tree,^* which 
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3. SUMMARY OF THE INVENTION 
In accordance with the present invention there is provided methods for 
identiiication of novel genes comprising (i) generating one or more specialized 
databases containing information on gene/protein structure, function and/or regulatory 

5 interactions; and (ii) searching the specialized databases for homology or for a 
particular motif and thereby identifying a putative novel gene of interest. The 
invention may further comprise performing simulation and hypothesis testing to 
identify or confirm that the putative gene is a novel gene of interest. 

The invention is based, in part, on the observation that functionally 

10 similar regulatory systems are generated during evolution by genetic duplication of 
ancestral genes. Thus, by comparing phylogenetic trees or regulatory networks and 
identifying genes and/or proteins absent in one system under comparison, the 
existence of as yet unidentified genes and/or proteins can be predicted. To make 
meaningful comparisons of phylogenetic trees it is necessary to distinguish between 

1 5 orthologs and paralogs. The present invention provides a method useful for 

discriminating between orthologs and paralogs and inferring the existence of as yet 
unidentified genes and/or proteins. 

The present invention relates to natural language processing and 
extraction of relational information associated with genes and proteins that are found 

20 in genomics journal articles. Specifically, the natural language processing system of 
the invention is used to parse the articles published in biological journals focusing on 
structure and interactions among genes and proteins followed by computer 
representation of such interactions. 

In accordance with the present invention, specialized databases are 

25 developed that contain information on gene/protein structure and interactions based on 
information derived from preexisting databases and/or research articles including 
information on interactions among genes and proteins, their domain/motif structure 
and their subcellular and tissue expression/distribution patterns. 

The invention relates to a sequence analysis program which utilizes the 

30 specialized database for comparison of a single sequence, processing the output into a 
sequence alignment, computing phylogenetic trees, and analyzing these trees to 
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Two homologous genes taken from different species that originate 
from the nearest common ancestor by speciation are referred to as orthologs, while 
any two genes that originate from a conrmion ancestor via a series of events involving 
intragenomic duplications are call paralogs. Tatusov et al. (1994, Proc. Nat.l, Acad. 
5 Sci USA 91:1 209 1 - 1 2095) describe comparisons of proteins encoded by the genomes 
of different phylogenetic lineages and elucidation of consistent patterns of sequence 
similarities permitting the delineation of clusters of orthologous groups (COGs). Each 
COG consists of individual orthologous genes or orthologous groups of paralogs from 
different phylogenetic lineages. Since orthologs typically have the same frinction, the 

10 classification of known genes and proteins into clusters of orthologous groups permits 
the assignment of a function to a newly discovered gene or protein by merely 
classifying it into a COG. Although Tatusov describes a method for assigning a 
function to a newly discovered gene, he does not describe a method for predicting the 
existence of undiscovered genes. In addition. Yuan, et al. attempted simultaneous 

1 5 reconstruction of a species tree and identification of paralogous groups of sequences 
and detection of orthologs in sequence databases (Yuan et al., 1998, Bioinformatics 
143:285-289). 

Other groups have aimed at capturing interactions among molecules 
through the use of programs designed to compare structures and functions of proteins 

20 (Kazic 1994, In: Molecular Modeling: From Virtual Tools to Real Problems ^ 

Kumosinski, T. and Liebman, M.N. (Eds.), American Chemical Society, Washington, 
D.C. pp. 486-494; Kazic, 1994, In: New Data Challenges in Our Information Age 
Glaesar, P.S. and Millward, M.T.L. (Eds.). Proceedings of the Thirteenth 
International COD ATA Secretariat, Paris pp. C133-C140; Goto et al., 1997, Pac. 

25 Symp. Biocomput. p. 175-186; Bono et al., 1998, Genome Res. 8:203-210; Selkov 
et al., 1996, Nucleic Acids Res. 24:26-28). These projects are significantly different 
from the inventive methods described herein because they do not describe methods for 
deducing the existence of as yet unknown genes based on comparisons of regulatory 
pathways and gene structure between one or more species. The present invention 

30 provides a method for increasing the sensitivity of analysis methods through the 
generation of specialized databases. 
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loci, can hardly be found using this strategy because of the complications associated 

with multiple loci linkage analysis. 

Specialized databases for homology searches have also been utilized in 

disease gene discovery projects. In recent years a number of efficient sequence 
5 comparison tools have been developed such as the BLAST (Basic Local Alignment 

Search Tool) family of programs designed for comparison of a single "search 

sequence" with a database (see Altschul et al., 1990, J. MoL Biol. 215:403-410; 

Altschul et al., 1997, Nucleic Acids Res. 25:3389-3402), the family of Hidden 

Markov Model methods for comparison of a set of aligned sequences that usually 
10 represent a protein motif or domain with a database (e.g., Krogh et al., 1994, J. Mol. 

Biol. 235:1501-1531; Grundy et al., 1997, Biochem Biophys. Res. Commun. 231:760- 

6) and various other comparison tools (Wu et al., 1996, Comput. Appl. Biosci 12:109- 
1 18; Neuwald et al., 1995, Protein Sci. 4:1618-1632; Neuwald, 1997, Nucleic Acids 

Res. 25:1665-1677). 

1 5 When used in disease gene discovery projects, homology searches can 

be enhanced by creating specialized databases that utilize statistical analysis for 
evaluating significance of sequence similarities in comparison of new sequences with 
a database of known sequence. Such databases are fme-tuned to the size of the 
database used (Altschul et al., 1990, J. Mol, Biol. 215:403-410; Altschul et al., 1997, 

20 Nucleic Acids Res. 25:3389-3402), so that the same level of homology between a 

search sequence and a database sequence can be determined to be highly significant if 
the search sequence is compared with a smaller database, or insignificant and thus 
undetectable, if the search sequence is compared with a larger database. 

In altematives to standard homology searches, in projects oriented 

25 towards gene discovery, researchers usually have some a priori knowledge about the 
set of genes/proteins that might display important similarity to the unknown new gene. 
Therefore, selecting an a priori defined set of genes/proteins for comparison with new 
experimental sequences is a feasible and useful strategy. This strategy was 
successfully applied to search for homologs of disease genes in yeast and nematode 

30 genomes by Mushegian et al. (1997, Proc. Natl. Acad. Sci USA 94:5831-5836). 
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2,2. IDENTIFICATION OF NOVEL GENES 
A variety of different methods are currently utilized for the 
identification and characterization of novel genes. Perhaps the most widely used 
5 method for generating large quantities of sequence information is via high throughput 
nucleotide sequencing of random DNA fragments. A disadvantage associated with 
this gene discovery technique is that in most instances when genes are identified their 
Ainction is unknown. 

For identification of specific disease genes, positional cloning is 

10 currently the most widely used method. The positional cloning approach combines 

methods of formal genetics, physical mapping and mutation analysis and usually starts 
with a precise description of the disease phenotype and a tracing of the disease 
through families of affected individuals. Genetic linkage data obtained from the 
analysis of affected families frequently allows the determination of an approximate 

1 5 genomic localization of the candidate disease gene with a precision of several millions 
of nucleotides. Once localized, the genetically defined chromosomal region is then 
recovered from genomic libraries as a contiguous set of genomic fragments. Genes 
residing in the disease-related region are determined by analysis of transcripts that are 
transcribed from the genomic fragment. From this analysis an initial set of candidate 

20 genes for a particular disease are identified based on the presence of the gene product 
in the biological system affected by disease and a correlation between its expression 
pattern and the pattern of disease progression. 

Important information for selection of candidate genes also comes from 
analysis of their homology with genes known to be part of the same or related 

25 biological system. Finally, the ultimate proof of association between a gene and a 

genetic disorder comes from mutational analysis of a gene in patients affected by the 
disorder and from demonstration of a statistical correlation between occurrence of 
mutation and the disease phenotype. 

Although positional cloning is a powerful method for gene discovery, 

30 the experimental method is extremely tedious and expensive. Moreover, disease 

genes implicated in genetically complex disorders, Le., those controlled by multiple 
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Natural language processing is an automated system that provides for 
a complex of programs for automatic retrieval of information from text analysis and 
for the computer representation of that information in a form that allows efficient 
access and extraction of that information. MedLee (Medical Language Extraction and 
5 Encoding System) has recently been successfully used for processing different types 
of medical texts as described in co-pending United States Patent Application Serial 
Number 09/370,329, incorporated herein in its entirety by reference (see also, 
Friedman et al., 1994, J. Amer. Med. Inf. Assoc. 1:161-174; Hripcsak et al. 1995, 
Ann. Intern. Med, 122:681-688; Hripcsak et al., 1998, Meth. Inform. Med.; Jain et ai., 
10 1996, Proc. AMI A Annu. Fall Symp. 542-546; Knirsch et al., 1998). When tested, 
MedLEE was on average as successful in retrieving reports associated with specified 
clinical connections as twelve medical experts invited for evaluation of the system. 

Another text analysis technique has recently been developed that 
combines finite-state machines with statistical machine learning approaches. These 
1 5 models extract detailed semantic information from texts (e.g., see Hatzivassiloglou 
1996, In Klavens, J.L., and Resnick, P.S. (eds) The Balancing Act: Combining 
Symbolic and Statistical Approaches to Language, MIT Press, Cambridge, MA) when 
extensive prior knowledge about the domain is not available. The techniques have 
been subsequently applied to the tasks of (i) automatically identifying medical terms 
20 for the automated summarization of research articles reporting on clinical studies and 
(ii) sanitizing sensitive information in patient records so that they can be widely 
disseminated for research purposes. 

A number of projects have also been developed as statistical 
information extraction tools that operate with limited or no prior knowledge about the 
25 application domain. These earlier efforts include XTRACT, a tool that recovers 
collocational restrictions between words that has been licensed to more than thirty 
sites worldwide (Smadja, F., 1993, J. Comp. Ling. 19:143-177), CHAMPOLLION, a 
system that retrieves bilingual mappings between words and phrases in parallel texts 
from different languages (Smadja, F. et al. 1996, J. Computational Linguistics 22:1- 
30 38), and a system that automatically aligns noisy, semi-parallel texts from different 
languages (Fimg, P. and McKeown, K.R., 1997, Machine Translation 1 1 :23-29). 
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in genomics journal articles. To enable access to information in textual form, the 
natural language processing system of the present invention provides a method for 
extracting and structuring information found in the literature in a form appropriate for 
subsequent applications. Specifically, the present invention provides for the 
5 generation of specialized databases containing information on gene/protein structure, 
function and regulatory interactions based on the retrieved of such information from 
research articles and databases, and computer representation of such information in a 
manner that allows efficient access to the extracted information. 

The invention further provides for the use of the specialized databases 

1 0 for identifying novel genes based on detection of sequence similarities and 

domain/motif matches between genes/proteins, computation and interpretation of 
phylogenetic trees for muitigene families, and analysis of homologous regulatory 
networks. The methods of the invention are based on the observation that functionally 
similar regulatory systems are generated during evolution by genetic duplication of 

1 5 ancestral genes. Thus, a comparison of homologous/similar networks within the same 
organism and between different species will allow the identification of genes absent in 
one of the systems under comparison. In this way genes that contribute to the 
phenotype of a specific disease associated with a particular biological system under 
analysis may be identified. 

20 2. BACKGROUND OF THE INVENTION 

2.1. NATURAL LANGUAGE PROCESSING 
Researchers working in molecular biology must constantly consider the 
information present in the literature relating to their regulatory systems of interest and 
the genes and proteins that operate within those systems. Unfortunately, to remain up- 

25 to-date on the relevant literature, the researcher is required to perform laborious 
reading and manual integration of research articles, each of which may address a 
narrow subject. Therefore, technology that enables rapid retrieval of information from 
literature and manipulation of derived functional data should have a dramatic effect on 
the accesss of the researcher to important facts and ultimately should facilitate the 

30 discovery of novel human genes. 
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GENE DISCOVERY THROUGH COMPARISONS OF NETWORKS 
OF STRUCTURAL AND FUNCTIONAL RELATIONSHIPS 
AMONG KNOWN GENES AND PROTEINS 

SPECIFICATION 

5 The invention described herein was funded in part by a grant from the 

National Library of Medicine, namely, Grant Number's LM06274 and LM05627. 
The United States Government may have certain rights to the invention. The present 
specification contains a computer program listing which appears as a microfiche 
Appendix H. 

10 STATEMENT REGARDING MATERIAL S UBJECT TO COPYRIGHT 

A portion of the disclosure of this patent document contains material 
which is subject to copyright protection. The copyright owner has no objection to the 
facsimile reproduction by anyone of any portion of the patent document, as it appears 
in any patent granted from the present application or in the Patent and Trademark 
1 5 Office file or records available to the public, but otherwise reserves all copyright 
rights whatsoever. 

An appendix containing source code listing utilized in practicing an 
exemplary embodiment of the invention is included as part of the Specification. 

1. INTRODUCTION 

20 The present invention relates to methods for identifying novel genes 

comprising: (i) generating one or more specialized databases containing information 
on gene/protein structure, function and/or regulatory interactions; and (ii) searching 
the specialized databases for homology or for a particular motif and thereby 
identifying a putative novel gene of interest. The invention may further comprise 

25 performing simulation and hypothesis testing to identify or confirm that the putative 
gene is a novel gene of interest. 

The present invention relates to natural language processing and 
extraction of relational information associated with genes and proteins that are found 
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synw (signal , v) . 
synw (signal , vp) . 
synw (signaled , ved) . 
synw (signaled, ved) . 
synw (signaled, ven) . 
synw (signaling, n) . 
synw (signaling, ving) . 
synw (signals, vp) . 
synw (split , n) . 
synw (split , v) . 
synw(split , ved) . 
synw (split , ven) . 
synw (split , vp) . 
synw (splits , vp) . 
synw (splitting, n) . 
synw (splitting, ving) . 
synw (stimulate , v) . 
synw (stimulate , vp) . 
syriw (stimulated, ved) . 
synw (stimulated, ven) . 
synw (stimulates , vp) . 
synw (stimulating, n) . 
synw (stimulating, ving) . 
synw (stimulation, n) . 
synw (substitute ,v) . 
synw (substitute, vp) . 
synw (substituted, ved) . 
synw (substituted, ven) , 
synw(substitutes, vp) . 
synw (substituting, n) . 
synw { subs t i tut ing , ving ) 
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synw (substitution, n) . 
synw (suppress, vp) . 
synw (suppress, v) . 
synw (suppressed, ved) . 
synw (suppressed, ven) . . 
synw (suppresses , vp) . 
synw (suppressing, n) . 
synw ( suppress ing,ving) . 
synw (suppression, n ). 
synw (tie , n) . 
synw ( tie , v) . 
synw (tie, vp) . 
synw (tied, ved) . 
synw (tied, ven) . 
synw (ties, vp) . 
synw(transcribe, v) . 
synw ( transcribe, vp) . 
synw (transcribed, ved) . 
synw (transcribed, ven) . 
synw(transcribes, vp) . 
synw (transcribing, n) . 
synw (transcribing, ving) . 
synw (transcription, n) . 
synw (tying, n) . 
synw (tying, ving) . 
synw (ubiqui tinizat ion, n) . 
synw (ubiquitinize, v) . 
synw(ubiquitinize, vp) . 
synw (ubiquitinized, ved) . 
synw (ubiquitinized, ven) . 
synw ( ubiqui tinizes,vp) . 
synw (ubiqui tini zing, n) . 
synw(ubiquitinizing, ving) . 
synw ( urge, n) . 
synw (urge, v) , 
synw (urge, vp) . 
synw (urged, ved) . 
synw ( urged , ven) . 
synw (urges , vp) . 
synw (urging, n) . 
synw (urging , ving) . 

% the following are verbs connected with complexes 
synw (form, v) . . 
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synw ( form, vp) . 
synw ( forms, vp) . 
synw (formed, ved) . 
synw ( formed, ven) . 
synw ( forming , n) . 
synw (formation, n) . 
synw (assemble, v) . 
synw (assemble , vp) . 
synw (assembles, vp) . 
synw (assembled, ved) . 
synw (assembled, ven) . 
synw (assembling, n) . 
synw (assembly, n) . 
synw (dissassemble, v) . 
synw(dissassemble*, vp) . 
synw (dissassembles , vp) . 
synw (dissassembled, ved) . 
synw (dissassembled, ven) . 
synw (dissassembling, n) . 
synw (dissassembly , n) . 
synw ( dissociate, v) . 
synw (dissociate, vp) . 
synw (dissociates, vp) . 
synw (dissociated, ved) , 
synw (dissociated, ven) . 
synw (dissociating, n) . 
synw (dissociation, n) . 
synw (recruit , v) . 
synw (recruit , vp) . 
synw (recruits, vp) . 
synw (recruited, ved) . 
synw (recruited, ven) . 
synw (recruiting, n) . 
synw (recruitment, n) . 
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% lexsemacc .pat 

% revised March 17, 2000 

% SEMANTIC LEXICON OF ACTIONS 

%%%%%%%%.%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
%%%%%%% 

% For genomics - the grammar tests for semantic and syntactic cate 

gories 

% separately for action type of categories; for substances the lex 

ical 

% entries are the same as in the medical area 

% action type phrases have two entries: a semantic entry and a syn 
tactic entry ' ■ 

% This lexicon contains the semantic entries for words and phrases 

% semp is a lexical entry for phrasal lexicon 

% semp(+Wordl, +Sem, +Wordlist, +Targetform, +Features) 

% semp specifies a semantic lexical definition for the genomics li 

terature 

% semp is equivalent to the predicate "phrase" in the medical area 
% semp: Wordl is first word of phrase, Sem is semantic category 
% semp: Wordl ist is list of words in phrase, Target form is output 
form 

% semp: Features is a "list of 2 elements or the atom "def" represe 
nting defaul 

% semp: Features 1st element is rev or nrev meaning reversed or no 
t reversed 

% semp: Features 2nd element is a # specifying number of arguments 
for action 

% semp: Features = def is equivalent to a list = [nrev, 2] 
% in case action has 1 argument, use [l/_] 

%semw is a lexical entry for single word 
% semw(+Word, +Sem, +Targetform, ^Features) 

% semw: the arguments are the same as for semp except there is no 
Wordl ist 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
%%%%%%%% 

:- multifile (semp/5) . 
:- multifile (semw/4 ) , 

semp (account , cause, [account , for] , cause, [def]) . 
semp (accounted, cause, (accounted, for] , cause, [def]) . 
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semp (accounting, cause, [accounting, for] , cause, [def ] ) . 

semp (accounts , cause, [accounts , for] , cause, [def]) . 

semp<add, attach, (add, up], attach, [def]), 

semp(added, attach, [added, up], attach, [def]). 

semp(adds, attach, [adds, up], attach, [def]), 

semp(are, cause, [are, a, means , of , producing) , cause, [def]) . 

semp (are, cause, (are, due, to] , cause , [2, rev]) . 

semp (as, cause, (as, a, result , of ] , cause, [2 , rev] ) . 

semp (attributable , cause , (attributable, to] , cause, [2 , rev] ). 

semp (attributed, cause, [attributed, to] , cause, [2, rev]) . 

semp (based, cause, [based, on] , cause, [2, rev]) . 

semp (based, cause, [based, upon] , cause, [2, rev]) . 

semp (because, cause, [because, of] , cause, [2, rev]) . 

semp(convey, signal, (conveys, a, signal], signal, [def]). 

semp (conveyed, signal, (conveyed, a, signal], signal, [def]). 

semp (conveying, signal, [conveying, a, signal], signal, (def ] ) - 

semp (conveys, signal, [conveys, a, signal] , signal , [def]). 

semp (dissociate, release, [dissociate, from], release, [def ]) . 

semp (dissociated, release, [dissociated, from] , release, [def]) . 

semp (dissociates, release, [dissociates, from], release, [def ]) . 

semp (dissociation, release, [dissociation, from], release, [def ] ) 



% A down- 



[def] ) . 
[def] ) . 

[def] ) 



semp (down, signal , (down, ' - ' , regulate] , signal, [def] ) 
regulates B A B 

semp (down, signal , [down, ' - ' , regulated] , signal , 
-regulates B A B 

semp (down, signal , [down, ' - • , regulates] , signal , 

-regulates B A B 

semp (down, signal , [down, ' , regulation] , signal 
n- regulates B A B 

semp (due, cause, [due, to, the, fact , that] , cause, [2, rev)) . 
semp (due, cause, [due, to] , cause, [2, rev] ) . 
semp(form, attach, [form, complex], attach, [def]). 
semp (formation, attach, (formation, of, complex], attach, 
semp (formed, attach, [formed, complex], attach, [def]). 
semp (forms, attach, [forms, complex], attach, [def]). 
semp (had, cause, (had, an, active , role , in] , cause , [def]) . 
semp (has , cause, (has, an, active , role , in) , cause, [def] ) . 
semp (have, cause, [have, an, active, role, in] , cause, [def]) . 
semp(is, cause, [is, a, means, of , producing] , cause, [def]) . 
semp (is, cause, [is, due, to] , cause, [2, rev] ) . 

semp (functions, inactivate, (functions, as, a, negative , regulator, of ] , i 
nactivate, [def] ) . 

semp (function, inactivate, (function, as, a, negative, regulator, of] , ina 



% A down 



% A down 



% A dow 



[def] ) 
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ctlvate , (def ] ) . 

sempdead, cause, [lead, to], cause, [def ]) . 
semp (lead, causel , [lead, to] , cause, [def] ) . 
semp (leading, cause, [leading, to], cause , (def ]) . 
semp (leading, cause, [leading, to] , cause, [def]) . 
semp (leads, cause, [leads, to], cause, [def]). 
semp ( leads , causel , [leads, to] , cause, [def] ) . 
semp (led, cause, [led, to] , cause, [def] ) . 

semp (may, cause, [may , be , responsible , for] , cause, [def]) . 

semp (mediate ,' signal , [mediate, a, signal), signal, [def]). %A 

mediates a signal to B 

semp (mediated, signal, [mediated, a, signal], signal, [def]) . % 
A mediates a signal to B 

semp (mediates , signal, [mediates, a, signal], signal, [def}). % 

A mediates a signal to B 

serap (mediation, signal, [mediation, of, a, signal]/ signal, [def]). 

%A mediates a signal to B. 
semp(n, createbond, {n, * - ' , acetylate) , ' N-acetylate • , [def ] ) . 
semp(n, createbond, [n, ' - ' , acetylated] , ' N-acetylate ' , [def ] ) . 
semp(n, createbond, [n, ' - ' , acetylates] , ' N-acetylate ' , [def ] ) . 
semp(n, createbond, [n, ' - ' , acetylation] , * N-acetylate ' , [def] ) , 
semp(n, createbond, [n, ' - ' , acylate] , 'N-acylate ' , [def ] ) . 
semp(n, createbond, [n, • - ' , acylated] , 'N-acylate', [def]) . 
semp(n, createbond, [n, acylates] , 'N-acylate [def ]) . 
semp(n, createbond, [n, acylat ion] N-acylate ', [def ]) . 
semp(n, createbond, [n, ' ~ ' , glycosylate] , ' N-glycosylate ' , [def] ) . 
semp(n, createbond, [n, • - ' , glycosylated] , 'N-glycosylate ' , [def] ) . 
semp(n, createbond, [n, ' - • , glycosylates] , 'N-glycosylate' , [def] ) . 
semp(n, createbond, [n, ' - ' , glycosylation] , 'N-glycosylate' , [def] ) . 
semp (n, breakbond, [n, terminal , proteolysis] , 'n- terminal proteoly 
sis' , [def] ) . 

semp(o, createbond, (o, • - ' , glycosylate] , ' 0-glycosylate ' , [def] ) . 
semp(o, createbond, to, '-' , glycosylated] , • O-glycosylate ' , [def]) . 
semp(o, createbond, [o, ' - • , glycosylates] , 'O-glycosylate' , [def] ) . 
semp(o, createbond, [o, ' - ' , glycosylation] , 'O-glycosylate' , [def] ) . 
semp (only, time, [only, after] , 'only after', [2, rev]) . 
semp (prolyl , createlDohd, [prolyl , ' - ' , 4 , • - ' , hydroxylate] , 

' prolyl -4 -hydroxylate • , [def] ) . 
semp (prolyl , createbond, [prolyl , • - • , 4 , ' - ' , hydroxylated] , 

' prolyl -4 -hydroxylate • , [def] ) . 
semp (prolyl , createbond, [prolyl , ' - ' , 4 , ' - ' , hydroxylates] , 

'prolyl -4 -hydroxylate' , [def] ) . 
semp (prolyl , createbond, [prolyl , ' - ' . 4 , • - ' , hydroxy lat ion] , 
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' prolyl -4 -hydroxylate ' , [def] ) . 
semp ( result , cause , [result , from] , cause , [2 , rev] ) . 
semp (result , cause, [result, in] , cause, [def] ) . 
semp (resulted, cause, [resulted, from] , cause, [2, rev]) . 
semp (resulted, cause, [resulted, in] , cause, [def]). 
semp (resulting, cause, [resulting, from] , cause, [2, rev]) . 
semp (resulting , cause , [resulting , in] , cause , [def]). 
semp (results , cause , [results, from) , cause , [2, rev)), 
semp (results , cause, (results, in) , cause, [def]) . 
semp (set, release, [set, free), release , [def ] ) . 
semp(set, release, [set, free], release , [def ] ) . 
semp (sets, release, [sets, free], release , [def]). 
semp(setting, release, [setting, free], release , [def ] ) . 
semp (suppress, inactivate, (suppress, activity, of] , inactivate, 
def]). 

semp (suppressed, inactivate, [suppressed, activity, of], 
e, [def]). 

semp (suppresses, inactivate, 

e, [def]}. 

semp ( suppression, inactivate 
tivate, [def] ) . 

semp(switch, activate, [switch, on, the, activity, of], 
. Idef)). 

semp (switched, activate, [switched, on, the, activity, 
vate, [def] ) . 
semp ( switches 
vate, [def] ) . 

semp (up, signal, [up, regulate] 
ates B B A 

semp (up, signal , [up, regulated] , signal, [2, rev]), 
semp (up, signal , [up, ' - ' , regulates] , signal , [2 , rev] ) . 
semp (up, signal , [up, regulation] , signal, [2, rev]). 
semp(was, cause, [was, a, means, of , producing] , cause, [def 1 ) . 
semp (was, cause, [was, due, to] , cause, [2, rev]) . 
semp(were, cause, [were , a, means , of , producing] , cause, [def ] ) 
semp (were, cause , [were , due , to] , cause , (2,rev]), 
semw (acetylate , createbond, acetylate, [def]) . 
semw(acetylated, createbond, acetylate, (def 1 ) . 
semw (acetylates , createbond, acetylate, [def ]) . 
semw (acetylation, createbond, acetylate, [def ]) . 
semw (activate, activate, activate, [def ]) . 
semw (activated, activate, activate , [def ]) . 
semw (activates, activate, activate , [def ]) - 



inactivat 

[suppresses, activity, of] , inactivat 
[suppression, of , activity, of], inac 

activate 



acti 



activate, [switches, on, the, activity, 
signal, [2, rev]) 



% 



of] 
of] 

A up-regul 



acti 
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semw( activation, activate, activate, [def] ) . 
semw(acici, attach, attach, [def ]) . 
semw(added, attach, attach, [def] ) . 
semw( addition, attach, attach, [def ]) . 
semw(adds, attach, attach, [def] ) . 

semw (after, time , after , [2 , rev] ) . % temporal relations 

semw (aggregate , attach, attach, [def] ) . 

semw (aggregated , attach, attach, [def] ) . 

semw (aggregates, attach, attach, [def ]) . 

semw (aggregation , attach, attach, [def]) . 

semw(arrest, inactivate, inact ivate , [def ] ) . 

semw (arrested, inactivate, inactivate, [def ]) . 

semw (arrests, inactivate, inactivate, [def] ) . 

semw(associat€ , attach, attach, [def]) . 

semw(associated, attach, attach, [def]) . 

semw (associates , attach, attach, [def ]) . 

semw(association, attach, attach, (def ] ) . 

semw(attach, attach, attach, [def}) . 

semw (attached , attach, attach, [def ]) . 

semw (attaches , attach, attach, [def]) . 

semw (attachment , attach, attach, [def ]) « 

semw(bind, attach, attach, [def]). 

semw (binding, attach, attach, [def]) . 

semw (binds , attach, attach, [def]) . 

semw (block, inactivate, inactivate , [def]) , 

semw (blocked, inactivate, inactivate, [def] ) . 

semw (blocking, inactivate, inactivate , [def]) . 

semw (blocks, inactivate, inactivate, {def}) . 

semw (bound, attach, attach, [def]) . 

semw(break, breakbond, 'break bond', [def]) . 

semw (breakage, breakbond, 'break bond ', [def ]) . 

semw (breaks, breakbond, 'break bond', [def]) . 

semw (broke, brea)cbond, 'break bond' , [def] ) . 

semw (broken, brealdDond, 'break bond' , [def ]) . % case without break 
bond 

semw (catalyzation, promote, catalyze , [def]). 
semw(catalyze, promote, catalyze, [def]) . 
semw (catalyzed, promote, catalyze, [def]) . 

semw(catalyzes, promote, catalyze, [def ]) . : 
semw (catalyzing, promote, catalyze, [def] ) . 
semw (cause, cause, cause, [def] ) . 
semw(caused, cause , cause, [def ] )v. 
semw(causes, cause, cause, tdef } ) . 
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c 1 eavage , breakbond , 
cleave, breakbond, ' 
cleaved, breakbond, 
cleaves, breakbond, 
coimmunoprecipitate , 
coimmunoprecipitated 



'break bond' , [def ] ) . 
break bond' , [def ] ) . 
'break bond' , [def] ) . 
'break bond' . [def] ) . 
attach, attach, [def] ) . 
, attach, attach, [def] ) . 
coimmunoprecipitates, attach, attach, [def]) . 
coimmunoprecipitation , attach, attach, [def]) . 
combination , at tach. attach, [def] ) . 
combine , attach, attach, [def]) . 
combined , attach, attach, [def] ) . 
combines, attach, attach, [def 1 > 
conjugate , attach, attach, [def] ) . 
conjugated , attach, attach , [def]) . 
conjugates, attach, attach, [def ]) . 
conjugation , attach, attach, [def] ) . 
connect , attach, attach, [defl ) . 
connected , attach, attach, [def] ) . 
connection , attach, attach , [def] ) , 
connects, attach, attach, [def] ) , 
constrain, inactivate, inactivate, (defl) . 
constrained, inactivate, inactivate, [def]). 
constrains, inactivate, inactivate, [def]) . 
constraint, inactivate, inactivate, [def]) . 
coprecipitate, attach, attach, [def]) . 
coprecipitated, attach, attach, [def] ) . 
coprecipitates, attach, attach, [def ]) . 
coprecipitation , attach, attach. [def]), 
copurif ication , attach, attach, [def]). 
copurified , attach, attach, [def] ) . 
('copurif ies, attach, attach, [def] ) . 
copurif y , attach, attach, [def]) . 
couple , attach, attach, [def]) . 
coupled, attach, attach, [def]) . 
couples, attach, attach, (def ]) . 

cut, breakbond, 'break bond', [def]). % leave breakbond onl 

cuts, breakbond, 'break bond' , [def] ) . 
deactivate, inactivate, inactivate, [def ] ) . 
deactivated, inactivate, inactivate, [def] ) . 
deactivates, inactivate, inact ivate , [def ] ) . 
deactivation, inactivate, inactivate , [def ]) . 
death, process, death, [1] ) , 
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(demethylate, breakbond, demethylate , [def ) ) . 
(demethylated, breakbond, demethylate, [def] ) . 
{demethylates, breakbond, demethylate, [def] ) . 
(demethylation, breakbond, demethylate , [def]) . 
(dephosphorylate, breakbond, dephosphorylate, [def] ) . 
(dephosphorylated, breakbond, dephosphorylate , [def ]) . 
(dephosphorylates , breakbond, dephosphorylate , [def]). 
(dephosphorylation, breakbond, dephosphorylate , [def]) . 
(die, process, death, [Ij ) . 
(died, process, death, [1] ) . 
(dies, process, death, [1] ) . 
(disassemble, release, release , [def ]) . 
(disassembled, release, release, [def] ) . 
(disassembles, release, release, [def]) . 
(disassembly, release, release, (def) ) , 
(discharge, release, release, [def]) . 
(discharged, release, release, [def] ) . 
(discharges, release, release, [def]) . 
(disengage, release, release, [def] ) . 
(disengaged, release, release, [def]) . 
(disengagement, release, release, [def] ) . 
(disengages, release, release, [def ]) . 
(divide, breakbond, 'break bond ', [def ]) . 
(divided, breakbond, 'break bond*, [def]) . 
(divides, breakbond, 'break bond' , [def] ) . 
(division, breakbond, 'break bond' , [def] ) . 
(dying, process, death, [1]) , 
(enhance, promote, promote, [def]) . 
(enhanced, promote , promote, [def] ) . 
(enhancement , promote , promote , [def] ) . 
(enhances, promote, promote, [def]) . 
(enhancing, promote, promote, [def]) . 

(express, generate , express, [def]) , % . can have either 1 or 2 ar 
guments 

(expressed, generate, express, [def] ) . 
(expresses, generate, express , [def] ) . 
(expressing, generate, express, [def ]) . 
(expression, generate, express, [def]) . 
(generate, generate, generate, [def] ) . 
(generated, generate , generate , [def]) . 
(generates, generate, generate, [def]) . 
(generating, generate , generate , [def] ) . 
(generation, generate, generate, [def] ) . 
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semw(hew, breakbond, 'break bond', [def ] ) . 
semw(hewed, breakbond, 'break bond', [def]). 
semw(hews, breakbond, 'break bond' , [def] ) . 
semw(hinder, inactivate, inactivate, [def] ) . 
semw (hindered, inactivate, inactivate , [def 1 ) . 
semw (hinders , inactivate, inactivate, [def ]) . 
semw (hindrance, inactivate, inactivate, [def]) . 
semw (inactivate, inactivate, inactivate, [def 1 ) . 
semw (inactivated, inactivate, inactivate, [def ]) . 
semw (inactivates , inactivate, inactivate, [def] ) , 
semw (inactivation, inactivate, inactivate, [def ]) . 
semw(incite, activate, activate, [def ]) . 
semw(incited, activate, activate, [def]) . 
semw (incitement , activate, activate, [def 1 ) . 
semw (incites , activate, activate, [def ]) . 
semw(induce, activate, activate, [def] ) . 
semw (induced, activate, activate, [def ]) . 
semw (induces , activate, activate, [def ]) . 
semw (induct ion, activate, activate, [def]) . 
semw (influence, activate, activate, [def]) . 
semw (influenced, activate, activate, [def ]) . 
semw (influences, activate, activate , [def ]) . 
semw (influencing, activate, act ivate , [def ] ) . 
semw (inhibit , inactivate, inact ivate , [def ]) . 
semw (inhibited, inactivate, inactivate , [def ]) . 
semw (inhibition, inactivate, inactivate, {def ] ) . 
semw ( inhibits , inactivate, inactivate, [def] ) . 
semw (initiate, activate, act ivate , [def ]) , 
semw (initiated, activate, activate, [def]) . 
semw (initiates , activate, activate, [def]) . 
semw (initiattion, activate, activate, [def]) . 
semw ( instigate , activate, activate, [def ]) . 
semw (instigated, activate, activate, [def]) . 
semw (instigates, activate, activate, [def ]) . 
semw ( instigation, activate, activate, [def ]) . 
semw (interact, interact, interact, [def]) . 
semw (interacted, interact, interact, [def]) . 
semw (interaction, interact, interact, [def]) . 
semw (interactions, interact, interact, [def] ) . 
semw( interacts, react, interact, [def]) . 
semw (join , attach, attach, [def]) . 
semw(joined , attach, attach, [def ]) . 
semw (joining, attach, attach, [def 3 ) . 
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joins, attach, attach, [def ] ) . 
juncture, attach, attach, [def] ) . 
liberate, release, release, (def ]) . 
liberated, release, release, [def]) . 
liberates, release, release, (def 1 ) . 
liberation, release, release , [def ]) . 
limit, inactivate, inactivate, [def 3 ) - 
limitation, inactivate, inactivate, [def] ) . 
limited, inactivate, inactivate , [def ]) . 
limits, inactivate, inactivate, [def] ) . 
link, attach, attach, [def]) . 
linked, attach, attach, [def]) . 
linking, attach, attach, (def] ) . 
links, attach, attach, [def ]) . 
mediate, promote, promote , (def ]) . 
mediated, promote, promote, [def] ) . 
mediates, promote, promote, [def ]) . 
mediation, promote, promote, [def ]) . 
methylate, createbond, methylate, [def] ) . 
methylated, createbond, methylate, [def] ) . 
methylates, createbond, methylate, [def] ) , 
methylation, createbond, methylate, (def ]) . 
modification, modify, modify, [def]) . 
modified, modify, modify, [def] ) . 
modifies, modify , modify , [def]) . 
modify,modify, modify , [def] ) . 
modifying, modify, modify, [def] ) . 
mutate, modify, mutate, [1] ) . 
mutated, modify, mutate, [1]) . 
mutates, modify , mutate, [1] ) . 
mutating, modify, mutate, [1]) . 
mutation, modify , mutate, [1] ) . 
overexpressed, generate, overexpress, [def]) . 
overexpresses , generate, overexpress, [def] ) . 
overexpress ing, generate , overexpress, [def]) . 
overexpress, generate, express, (def J ) . 
overexpression, generate , overexpress, [def]) . 
pair, attach, attach, [def] ) . 
paired, attach, attach, [def]) . 
pairing, attach, attach, [def ]) . 
pairs , attach, attach, [def]) . 

phosphorylate, createbond, phosphorylate, [def]) . 
phosphorylated, createbond, phosphorylate, [def] ) 
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semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
A i 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semn^^ 



phosphorylates, createbond, phosphorylate , [def ] ) . 
phosphorylation, createbond, phosphorylate, [def ] ) , 
precede, cause, cause, [def]) . 
preceded, cause, cause, [def]). 
precedes, cause, cause, [def]). 
preceding, cause, cause, [def]). 
promote, promote, promote, [def]) , 
promoted, promote, promote, [def] ) . , 
promotes, promote, promote, [def]) . 
promotion, promote, promote, [def] ) . 
prompt, activate, activate, [def ]) . 
prompted, activate, activate, [def ]) . 
prompting, activate, activate, [def]). 
prompts, activate, activate, (def ]) . 
react, react, react, [def]) . 
reacted, react, react , [def ]) . 
reaction, react, react , [def 1 ) . 
reactions, react, react, [def]) . 
reacts, react, react , [def ]) . 
regulate, signal, signal, [def] ) . 
regulated, signal, signal, [def] ) . 
, ~-> B 

regulates, signal, signal, [def]) . 
regulation, signal, signal , [def ]) . 
release, release, release, [def]) . 
released, release, release, [def 1 ) . 
releases, release, release, [def] ) . 
removal, breakbond, 'break bond ' , [def]) . 
remove, breaJcbond, 'break bond ' , [def] ) . 
remove, breakbond, 'break bond ' , [def] ) . 
removes, breakbond, 'break bond ', [def]) . 
replace, substitute, substitute, [def] ) . 
replaced, substitute, substitute, [def]) . 
replacement, substitute, substitute, [def]) 
replaces, substitute, substitute, [def]) . 
repress, inactivate, inactivate, [def] ) . 
repressed, inactivate, inactivate, [def]) , 
represses, inactivate, inactivate, [def ]) . 
repression, inactivate, inactivate, [def] ) . 
require, cause, cause, [2, rev]) . 
required, cause, cause, [2, rev] ) . 
requirement, cause, cause, [2, rev] ) . 
requires, cause, cause, 12 , rev] ). 



% B is regulated by 
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semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 
semw 



requiring, cause, cause , (2 , rev] ). 
restrain, inactivate, inactivate , [def] ) . 
restrained, inactivate, inactivate , [def] ) . 
restrains, inactivate, inactivate, [def]) . 
restraint, inactivate, inactivate, [def] ) . 
sensitization, activate, act ivate , [def ] ) . 
sensitize, activate, activate, [def] ) . 
sensitized, activate, activate , (def ]) . 
sensitizes, activate, activate, [def] ) . 
separate, breakbond, 'break bond (def )) . 
separated, breakbond, 'break bond', [def]) . 
separates, breakbond, 'break bond', [def]). 
separation, breakbond, 'break bond ' , [def 3 ) . 
sever, breakbond, 'break bond' , [def ]) . 
severance, breakbond, 'break bond', [def]) . 
severed, breakbond, 'break bond', [def]). 
severs, breakbond, 'break bond (def ]) . 
signal, signal, signal , [def I ) . 
signaled, signal, signal , [def ]) . 
signaling, signal, signal, [def]). 
signals, signal, signal , [def ]) . 
split, breakbond, 'break bond', [def]) , 
splits, breakbond, 'break bond ', (def ]) . 
splitting, breakbond, 'break bond', [def]) . 
stimulate, activate, activate, [def]) . 
stimulated, activate, activate, [def]) . 
stimulates, activate, activate, [def ]) . 
stimulation, activate, activate, [def] ) . 
substitute, substitute, substitute, [def]) . 
substituted, substitute, substitute, [def] ) . 
substitutes, substitute, substitute, [def]) . 
substitution, substitute, substitute, [def ] ) 
suppress, inactivate, inactivate, [def 3 ) . 
suppressed, inactivate, inactivate, [def] ) . 
suppresses, inactivate, inactivate, [def ]) . 
suppression, inactivate, inactivate, [def ]) . 
tie, attach, attach, [def] ) . 
tied, attach, attach, [def]) . 
ties, attach, attach, [def]) . 
transcribe, generate, transcribe, [def]) . 
transcribed, generate, transcribe, [def]) . 
transcribes, generate, transcribe, [def]) . 
transcribing, generate, transcribe, [def]) . 
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transcription, generate, transcribe, (def ] ) . 

ubiquitinize , createbond, ubiquitinize, [def ) ) . 

ubiquitinize , createbond, ubiquitinize, [def ]) . 

ubiquitinized, createbond, ubiquitinize, [def]) 

ubiquitinizes, createbond, ubiquitinize, [def)) 

urge, activate, activate, [def]) . 

urge, activate, activate, [def ]) . 

urged, activate, activate, [def]) . 

urges, activate, activate, [def )) . 

urging, activate, activate , [def 3 ) . 

form, attach, attach, [def] ) . 

forms, attach, attach, [def]) . 

formed, attach, attach, [def] ) . 

forming, attach, attach, [def]) . 

formation, attach, attach, [def]) . 

assemble, attach, attach, [def] ) . 

assembles, attach, attach, [def]) . ' 

assembled, attach, attach, [def]) . 

assembling, attach, attach, [def]). 

assembly , attach, attach, [def]) . 

dissassemble, release, release, [def]) . 

dissassembles, release, release, [def] ) . 

dissassembled, release, release, [def] ) . 

dissassembling, release, release, [def] ) . 

dissassembly, release, release, [def] ) . 

dissociate, release, release, [def]) . 

dissociates, release, release, [def]) . 

dissociated, release, release , [def]) . 

dissociating, release, release, [def]) . 

dissociation, release, release, [def]) . 

recruit , attach, attach, [def]) . 

recruits , attach, attach, [def]) . 

recruited, attach, attach, [def] ) : . 

recruiting, attach, attach, [def] ) . 

recruitment , attach, attach, [def] ) . 
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* edited Genome grammar - adapted from MedLEE's grammar for use with MedLEE 
% this is to be used along with the genomics lexicon of substances, actions, 
% and relations. 

% revised March 16, April 5, 2000 
% adjusted for tagged input 
:- multifile (wdef/3 ) . 
multifile (phrase/5) . 

%%%%%%%%%%%%%%%%%%%% Semantic Grammar for Genomics %%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% % 
% Written by Carol Friedman for the MedLEE System % 

% 

* Queens College of the City University tff New York % 
% % 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Highest Level Predicate - sera_sent - 1st arg. is target structure % 

* - 2nd arg. is a list of words in sentence% 
% - 3rd arg. is ' [) ' % 
% Target structure: a frame or set of connected frames: % 
% the frame describes an action or several related actions; % 
% an action frame is a list consisting of the symbol 'action' % 
% followed by the code for the action and arguments. % 
% The arguments are either substances or actions; % 
% each substance slot consists of the name of the type of % 
% substance followed by the value for the substance; % 
% the substance slot may contain slots for several siibstances. % 
% Examples: % 
% Blocking of 11-2 gene transcription by activated rapl. % 
% [action, inactivate, (protein, Rapl, [state^ active] ] , % 
% [action, transcribe, [x] , (gene, inter leukin- 21 1 3 % 
% % 
% The adapter protein crkl was associated with both phosphorylated cbl and the% 
% guanidine nucleotide- releasing factor c3g. * 
% (action, attach, (protein, CrkL] , % 
% [relation, and, (protein, Cbl, [state, phosphorylated] ) , % 
% [protein, guanidine nucleotide- releasing factor C3G, 
% (state, phosphorylated] ]) ] % 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% fail an unknown predicate 

:- unknown fail) . 

:- op{900, fy, (not, once]). % same priority and type as \-h 
:- op{700, xfx, % same priority and type as « or == 

% snoop is generally used to find input string when using a OCG 
% the input string is used for constraints 

snoop (A, B, A, B) . 

sem_sent(P,§emlist,X) --> 

{assert (adds total (0) ) } , 

sem_parse (P, Semlist ,X) . 
sem_parse (Target, Semi is t) -~> 

sem_pattems{P,Semlist) . 
sem^parse (Target, Semlist,X) --> 

sem_pat terns (P, Semi is t) , 

sem_endomot ( P , Target , X ) . 

sem_parse ( [failure] ,_,X,_, _) 
adds total (X) . 

sem_endornot (P, P,X) --> % P is target if there is an endmark 
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sem_endmark, 

{addstotal (X) } , % X is number of times reached endmark 
sem_endornot :- % did not reach endmark; update count and fail 

uptptal, fail, 
sem_endornot [failure] 

addstotal (X) , % x is number of times reached 

X >= 50. " 



% Finding patterns 



sem_patterns (F, Semlist) 

pattern (Fl, Seralist) , 

{F1 \= CI } r % 1st finding should not be empty 

morepattem (R, F2 , Semlist) , % connected patterns 
{getrelation(R,Fl.F2,F) } . 



* The action pattern types are: pattern, nounactionpatt, actpatt, and * 

* nounactpatt. * 

* pattern actionarg(Al) * 

* active or passive verb * 

* actignarg <A2) . * 

* pattern nounactionpatt . * 

* pattern actpatt. * 

% pattern is saved in a symbol table (st) ; check for success /failure 1st 
% Case where pattern is in st and has been successful 
pattem(Fmt,_) --> checkst (pattern, s, Fmt) . 
% Case where pattern is in st as a failure. 
pattem(_,_) --> checkst (pattern f,_) , {!, fail}. 

% pattern 5:- an action pattern with a nominal verb 
% Psl cleavage by zvad. 

* apoptosis- induced cleavage of PS2 by zDEVD. 
pattern (F,Semlist) --> 

snoop (SO, SO) , 
{ \+ checkst (pattern, 5, SO, _) , 

actionchk (Semlist) }, 

nounactionpatt (F) , 

snoop (S,S) , 
{ addst (pattern, 5, s,F, SO, S) 
}. 



% pattern 1: an action/substance acts on an action/substance 

% the activation of rapl inhibits the expression of il-2 

% rapl functions as a negative regulator of tcr- mediated il-2 gene 

% transcription . 

pattern (F, Semlist) snoop (SO, SO) , % SO is the input string 

{ \+ checkst (pattern, 1,_,_, SO, _) , 
actionchk (Semlist) , 
connectchk (Semlist) } , 
actionarg(Al) , 
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connect ac t (Sem, [v,vp,ved] , Target , Features) , 
actionarg {A2) , 

snoop (S,S) f %ending sentence list 
{ member (def, Features), 

modlist (CA1,A2, Site], Mods ) ; 
member { rev , Features ) , 
modlist ( [A2,Al,Site] ,Mods) ) , 
frame (F, action, Target , Mods) , 
addst (pattern, l,s,F, SO, S) 

}. 

% pattern 2: an action/substance was acted on by an action/substance 
% The aggregation of bad vas suppressed. 

% The aggregation of bad was suppressed by the phosphorylation of jnk. 
% Grb2 was associated with Cbl. 

% Apoptos is -associated cleavage of endogenous PSl was blocked by the 
% treatment with zVAD. 
pattern (F, Semi ist) 

snoop < SO, SO) . % SO is the input string 
{ \+ checkst (pattern, 2, SO, _) , 
actionchk (Semlist) , 
connectchk (Semlist) }, 
actionarg (A2) , 
sem_beterm (_) , * was 

connectact (Sem, [ven] , Target, Features) , %activated 

optbyarg (Al) , 

snoop (S,S), %ending sentence list 
{ (member (def. Features), 

modlist ( CAl,A2,Site] ,Mods> ; 
member (rev, Features) , 
modlist ( [A2,A1, Site) , Mods) ) , 
frame (Fraction, Target, Mods) , 
addst (pattern , 2 , s , F, SO , S ) 

}. 

% pattern 3 : an act ion/ substance acted on an act ion/ substance 

% bad induced phosphorylation of fyn. 

% tcr and cd2 8 -mediated il-2 transcription. 

pattern (F, Semlist) 

snoop (SO, SO) , 
{ \+ checkst (pattern, 3,_,_,S0,_), 

actionchk (Semlist) , 

connectchk (Semlist) ), 

actionarg (Al) , % substance or basic action 
% optdash, 

connectacts (Sem, [vp, ven, ved] , Target, Features) , % 'activated' 

% optof, 

actionarg (A2 ) , % had pattern here 
snoop ( S , S ) , 
{ (member (def , Features) , 

modlist ( [Al,A2,Site] ,Mods) ; 
member (rev, Features) , 
modlist ( [A2,A1, Site] ,Mods) ) , 
frame ( F , act ion , Target , Mods ) , 
addst (pattern, 3 , s, P, SO, S) 
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% pattern 4: a simple action pattern with an active verb. 
% Activated Raf-1 phosphorylates MEK-X, 
pattern (F, Senaist) --> 
snoop (SO, SO) , 

%check that sentence has an action word/phrase 
{ \+ checkst (pattern, 4 , SO , , 

actionchk (Semlist) }, 

actpatt (F) , 

snoop ( S , S ) , 
{ addst (pattern, 4, s, F, SO, S) 
}. • 

% no more patterns - save failure 

pattern (_,_) addst (pattern, 0, f,_) , {l, fail). 

% sem_morepattern(-Rel, -P, -i-Semlist, -t-SO, -t-S) : 
% Rel is a relation and its value £rame; 

% P is the remaining patterns, Semlist is the list of semantic classes 

% in sentence 

% if have a series of •,'s, use the relation "and" or "or" if in the nest 
* and make that the relation 
morepattem(R, F, Semlist) --> 

sem_relat ion (Rl, Modi ) , %relation and modifiers 

sem_pattems (F, Semlist) , 

{( f rame (F, rel, Conj2,_) , % F contains nested relation 

(Conj2 a and; Conj2 * or), f rame (Rl, rel, ',',_) , % Rl relation frame 
frame (R, rel, Con j 2, _) % value of relation is Conj2 

i 

Rl \» [3 , % where do Type,^ Value and Mods2 come from? 
frame (Rl, Type, Value, Mod2) , % get components of original relation 
mergemods (Modi , Mod2 , Mods ) , 
( Mods = [] , frame{R, rel, Value, n ) , !; 

%frame(R, rel, [Value |Mods] , [j ) % make it rel connector with rel mod 

R = [rel, [Value I Mods 11 

) 

) 

}• 

% no more findings 
morepatterh ([],[] ,_,S,S) . 

% actionarg is the argument of pattern 

% actionarg is either a substance or a basic action 

% actionarg is saved in a symbol table (st) ; check for success/f ailure 1st 
% Case where actionarg is. in st an<i have been successful 
actionarg(A) checkst (actionarg, s , A) . 

% Case where actionarg is in st as a failure, 
actionarg (_) --> checkst (actionarg, f ,_) , { I , fail} . 

% actionarg l: a substance or substances 
% Rapl, active Rapl, Cbl and Crkl 

actionarg(A) snoop (SO, SO) , % SO is the input string 

{ \+ checkst (actionarg, l,_,_, SO, _)} , 
substances (A) , 
snoop (S,S) , 
{ addst (actionarg, 1, s, A, SO, S) }. 



4 



BNSDOCID: <WO 00B3aa7A1 I > 



wo 00/63687 



PCTAJSOOyi0302 



% actionarg 2: a process like apoptosis, or a disease 
act:ionarg<A) snoop (SO, SO) , % SO is the input string 

{ \+ checkst (actionarg, 2. SO, _)} , 
processpatt (A) , 
snoop {S,S) , 
{ adds t (actionarg, 2, S/ A, SO, S) 

}■ 

% actionarg 3 : a nominal action pattern 
% Etoposide- induced apoptosis- 
% Etoposide- induced PSl cleavage by zVAD. 
actionarg(A) --> snoop (SO, SO) , % SO is the input string 
{ \i- checkst (actionarg* 3, SO, _)} , 
nounactionpatt (A) , 
snoop (S, S) , 

{addst (actionarg, 3 , s, A, SO, S) 

}- 

% actionarg 4: the object of the nominal action is an actionarg 
% Blocking of IL-2 Gene transcription by activated rapl. 
actionarg(A) snoop (SO, SO) , % SO is the input string 

{ \+ checkst (actionarg, 4 /_/_.S0,_) 
action (Sem, Cn,ving] , Target, Features) , 
[of], 

actionarg (Al) , 
optbyagent (A2) , 
snoop (S, S) , 
{ (member (def. Features), 
modlist ( (Al, A2) ,Mods) ; 
member (rev, Features) , 
modlist ( EA2,A1] , Mods) ) , 
frame (A, action, Target , Mods) , 
addst (actionarg, 4 , s. A, SO, S) 

}• 



% no more actionarg - save failure • 

actionarg (_) addst (actionarg, 0 , f,_) , {l, fail}. 

% nounactionpatt is a nominal action pattern which allows for left and right 
% modifiers 

% 11-2 gene transcription mediated by tcr and cd28 was inhibited by rapl. 
% Activated rapl functions as a negative regulator of tcr and cd- 2 8 -mediated 
il_2 transcription. 

% nounactionpatt is saved in a symbol tablfe (st) ; check for success /failure 1st 
% Case where nounactionpatt is in st and has been successful 
nounactionpatt (A) checkst (nounactionpatt s, A) . 

% Case where nounaction patt is in st as a failure, 
nounactionpatt (_) checkst (nounactionpatt f,_) , {i, fail}. 

nounactionpatt (P) --> snoop (SO, SO) , % SO is the input string 

{ \+ checks t (nounactionpatt,! ,„*_»S0,_)}, 
actionlmod(L, Synl) , 
noxinactionunit (A) , 
actionrmod(R, Syn2) , 
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snoop (S, S) , 
{ (Synl = ved, append (R, [A] , RA) , 
append (L, RA, P) ; 
Synl = ving, append (R, [A], RA) , 
L - [action, Verb, Object] , 
modlist(RA, Object, Mods)., 
frame (P, action, Verb, Mods)), 
addst (nounactionpatt , 1, P, SO, S) ) . 
% no more nounactionpatt - save failure 

nounactionpatt (_) addst (nounactionpatt . 0 » f . {!. fail}. 

% the central unit of the nounactionpatt is a nounactpatt or a process 
nounactionunit (A) - - > nounactpatt (A) . 
nounactionunit (A) « - > process (A) . 

% left modifiers of nounactpatt 
% Zvad- inhibited cleavage pf Psl 
actionlmod(L, ved) substances (S) , 

optdash, 

action (Sem, [ved] , Target , Features ) , 
{ frame(L, action^ Target, is]) }. 

% apoptosis induced cleavage of ps2 
actionlinod(L,ved) --> process (S) , 

optdash , 

action(Sem, [ved] , Target, Features ) , 
{ frame(L, action. Target. [S]) }. 



% apoptosis causing cleavage of Psl by Zvad. 
% heed to invert the order of nounactpatt and actionlmod 
actionlmod (Living) --> processobject (A) , % process or novmacpatt, 

action (Sem, (ving] , Target, Features) , 
{ frame(L, action. Target, A) }. 

actionlmod ([] [] . 

actionrmod(R, ved) action (Sem, [ved] , Target, Features) , 

byagent(A), % may have to add ving to actionrmod 
{ f rame (R, action, Sem, A) }. 
actionrmod {[] t) ■ 



% 

% actpatt parses a simple action between substances expressed by an active verb 
% 

* actpatt is saved in a symbol table (st) ; check for success/failure % % 1st 

% Case where actpatt is in st and has been successful 

actpatt(F) checkst (actpatt, s, F) . 

% Case where actpatt is in st as a failure. 

actpatt (_) checkst(actpatt,_,f , {«, fail}. 

% actpatt 1: substance acts on substance 
% PDKl phosphorylates p70s6k at Thr22 9 
actpatt (F) 

snoop (SO, SO) , % SO is the input string 
{ \+ checks t (actpatt, 1 /_*_*S0,_)}, 
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substances (Al) , 

sem_whichrel , % opt 'that' 

action (Semclass, [vp.ved] , Target , Features) , 

prepopt, % added prepopt to allow action 'to' and 'with' substance 
substances (A2) , 
siteinf o (Site) , 
snoop iS,S) , 
{ (member(def. Features), 
modlist ( [Al , A2 , Site] , Mods ) ; 
member ( rev , Features ) , 
modlist ( CA2 , Al, Site] .Mods) ) , 
frame (F, act ion, Target, Mods ) , 
addst (actpatt , 1 ,s,F,SO,S) 

}• 

% acpatt 2: 

% Substance was bound by Substance 

% Substance was associated to substance. 

% F can give either first or second place to the second argument; 
% a byagent gets first position; prepagent gets second. 
% Phosphorylated Fyn was associated with Cbl . 

actpatt (F) 

snoop (SO, SO) , % SO is the input string 
{ \+ checkst (actpatt, 2, SO, _) 
substances (Al) , 
sein_beterm (_) , 

action (Semclass, [venj , Target , Features ) , 
optbyorprepagent (Position , A2 ) , 
snoop (S, S) , 
{ (member (def. Features), 

(Position=second, modlist ( [Al , A2 , Site] , Mods) ; 
Position^ first, modlist ( [A2,A1, Site] , Mods) ) ; 
member (rev, Features) , 

(Position-second, modlist ( [A2 ,A1 , Site] , Mods) ; 
Position* first, modlist ( [A1,A2, Site) , Mods) )) , 
f rame ( F , act ion , Targe t , Mods ) , 
addst (actpatt,2,s,F,S0,S) 
}. ■ 

% no more actpatt - save failure 

actpatt (_) addst (actpatt, 0, f,_) , {!, fail). 

% 

% nounactpatt parses a simple action between substances expressed by a nominal 

% verb 

% 

% nounactpatt is saved in a symbol table {stj ; check for success/failure 1st 
% Case where nounactpatt is in st and have been successful 
nounactpatt (Fmt) --> checks t (nounactpatt s , Fmt) . 
% Case where nounactpatt is in st as a failure, 
nounactpatt (_) checks t (nounactpatt, __, f ,_) , {!, fail}. 

% nounactpatt 1 : 

% Jnk phosphorylation of Bad 

nounactpatt (F) 

snoop(s6,S0) , % SO is the input string 
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{ \+ checkst (nounactpatt, S0,_) }, 
substances (Al) , 
{aminoacidtest (Al) } , 
optdash, 

action (Semclass, [n] , Target , Features) , 
ofobject(A2) , 
% siteinfo(Site) , 
snoop (S, S) , 
{ (member (def, Features) , 

modlist( [A1,A2, Site] , Mods) ; 

member (rev, Features) , 

modlist ( tA2,Al, Site] , Mods) ) , 

frame(F, act ion. Target, Mods) , 

addst (nounactpatt, l,s, F, SO,S) 

}• 

% nounactpatt 2 : the binding of substance and substance 
% association of Fyn and Cbl . 

% the reason for having this as a separate pattern is to 

% prevent 'Fyn and Cbl' from being parsed together as substances 

nounactpatt (F) --> 

snoop (SO, SO) , % SO is the input string 
{ \+ checlcst (nounactpatt, 2 »_^„*S0,_) }, 
action (attach, [ving,n] , Target, Features) , 
ofobjectl (Al) , 
andobject (A2) , 
% siteinfo (Site) , 

snoop ( S , S } , 
{ modlist{ [A1,A2, Site] , Mods) , 
£ rame ( F , act ion , Target , Mods ) , 
addst (notinactpatt, 2, SO, S) 

}- 

% nounactpatt 3 : 

% The cleavage of protein by substance. 

% Association of phosphorylated Fyn with Cbl 

% Tyrosine phosphorylation of Cbl by kinase 

% optbyorprepagent determines the order of arguments; byagent is placed first; 
% prepagent is placed second 

nounactpatt ( F ) - - > 

snoop(SO,SO), \ SO is the input string 
{ \+ checks t (nounactpatt ,3 . _. SO , _) } . 
actionof (F) , 
snoop (S, S) , 
{ addst (nounactpatt, 3 ,s,F,SO,S) }. 

actionof (F) 

siteinfo (Site) , 

action (Semclass, [ving,n] , Target, Features) , 
optofobject(Al} , 
optbyorprepagent ( Position , A2 > , 
snoop (S, S) , 
{ (member (def, Features), 

( Posit ion=second, modlist( [Al,A2,Site] ,Mods) ; 
Positions first, modlist ( [A2,A1, Site] , Mods)') ; 
member (rev, Features) , 
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(Position«second, m'odlist( lA2>Al,site) ,Mods) ; 
Position* first, modlist ( [A1,A2, Site) .Mods) )) , 
frame (F, action, Target .Mods) 

}. 

% nounactpatt 4 : 

% Fyn association with Cbl . 

nounactpatt (F) 

snoop(SO,SO) , % SO is the input string 
{ \+ checks t (nounactpatt. 4, so, _) ), 

substances (Al) , 

action (Semclass. [ving,n] , Target, Features) , 
withobject (A2) , 
% siteinfo(Site) , 
snoop {S,S) , 
{ modi ist ( CA1,A2, Site] .Mods) , 
frame (Fraction, Target. Mods) , 
addst (nounactpatt , 4 , s , F, SO , S) 

}. 

aminoacidtest (X) X \= [aminoacid|_] . 

% nounactpatt 5 : 

% IL-2 gene transcription 

% Cbl phosphorylation [by substance or action) 
nounactpatt ( P) - - > 

snoop(SO.SO), % SO is the input string 
{ \+ chec)cst (nounactpatt , 5 #„»_#S0»_) }. 

substances (A2) , 

opt-dash. 

action (Semclass, [n} ,Target, Features) . 

optbyagent (Al) , 
% siteinfo(Site) , 

snoop (S, S) , 
{ (memberCdef, Features). 

modlist ( [A1,A2, Site) , Mods) ; 

member (rev, Features) , 

modlist{ [A2.Al.Site) .Mods)) , 

frame (F, action, Target, Mods) , 

adds t (nounactpatt, S ,s,F,SO,S) 

}• 

% notmactpatt 6 : 

% fyn- cbl association. 

nounactpatt (P) 

snoop (SO, SO) , % SO is^the input string 
{ \+ checkst (nounactpatt, 6 #_._»S0,_) }, 

substances (Al) , 

optdash, 

substances (A2) , 

action (Semclass, (n.vingj , Target , Features ) , 
% siteinfo(Site} , 
snoop (S,S), 
{ modlist ( [A1,A2, Site) .Mods) , 
frame (Fraction, Target, Mods) , 
addst (noijnactpatt, 6,s, P,SO, S) 

}■ 
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% nounactpatt 7 : 

% Cbl phosphorylated by fyn. 

nounactpatt {F> 

snoop (SO, SO) , % SO is the input string 
{ \+ checkst (nounactpatt, 7 *_'_'S0,_)}, 
substances (Al) , 

action (Semclass, [ven] , Target, Features) , 
tby) . 

substances (A2} , 
% siteinfo(Site) , 

snoop (S,S) , 
% { (member (def. Features), 

{ modi ist ( tA2,Al, Site] , Mods) , 
* member ( rev , Features ) , 

% modlist ([Al,A2,Site] ,Mods) ) . 

frame (F, act ion, Target, Mods) , 
addst (nounactpatt, 7, s, F,SO,S) 

}. 

% no more nounactpatt - save failure 

nounactpatt {_) addst (nounactpatt . 0, f,_) , {!, fail}. 



connectact (Sem, Syn, Target , Features) 

action (Sem, Syn, Target, Features) , 
{member (Sem, [cause, causel, activate, inactivate, signal, substitute, promote] ) } . 

connectacts (Sem, Syn, Target , Features ) 

connectact (Sem, Syn, Target , Features) . 

% aminoacid like tyrosine : ex. : tyrosine Cbl phosphorylation 
% at position 201 Thr 
siteinfo(S) aminoacid (A) , 

{frame (S, site, [A] ,[]) } . 

sitein£o(S) 

sitepreps, % 'in', 'at' 
position (S) , 
siteinfo( [])-->[]. 
sitepreps prepterm (in,_) . 

sitepreps prepterm(at . 

position(S) --> [position], 

sem__integerterm( I) , 
{ frame (S, site, I, [] ) } . 



% The definitions of actions refer to the lexicons lexsynact.pl and lexsemact.pl 
% Sem is the semantic class; Syn is the syntactic class 
% F is the target 

% oneaction was added for use with moreaction to allow parsing of conjoined 
% actions 

oneaction (activate, Syn, F, Features) --> activateterm (Syn, F, Features) , { !}. 
oneaction (attach, Syn, F, Features) --> attachterm (Syn, F, Features) , { J } . 

oneact ion (brealcbond , Syn , F , Features ) - - > brealdsondt erm ( Syn , F , Features ),(!}• 
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oneaction{createbond,Syn, F, Features) 
oneaction (inactivate, Syn, F, Features) 
oneaction { react , Syn , F , Features ) 
oneact ion (release, Syn, F, Features) 
oneact ion ( signal / Syn, F, Features) 
oneaction (substitute, Syn, F, Features) 
oneact ion (transcribe, Syn, P, Features) 
oneaction (promote, Syn, F, Features) 
oneaction (generate, Syn, p, Features) 
oneaction (cause, Syn, F, Features) > 



- > createbondterm ( Syn , F , Features ) , { « ) 
-> inactivateterm(Syn, F, Features) , { ! } 
-> react term (Syn, F, Features) , { I } • 
--> releaseterm(Syn, F, Features) , { i } . 
-> signalterm (Syn, F, Features) ,{!} . 
-> substituteterm (Syn, F, Features) , { ! } 
-> transcribe term (Syn, P, Features) , { i } 
- > promoteterro ( Syn , F , Features ) , { i ) . 
-> generateterm(Syn, F, Features) , { ! } . 
causeterm( Syn, F, Features) ,{•) ■ 



action (activate, Syn, F, Features) activateterm (Syn, Al, Features) , 

moreaction (Conj , Args) , 
{Conj = [] ,F *A1; 

Conj\=:(] , mergemods {[ faction, Al] ] , Args, Actions) , 
frame (Fl, relation, Conj , Actions) , F = (Fll } • 
action (attach, Syn, F, Features) attachterm(Syn, Al ^Features). 

moreaction (Conj , Args) , 
(Conj * I] ,F =A1; 

Conj\= [] , mergemods ( [ (action, Al] ] , Args , Actions) , 
frame (Fl, relation, Conj , Actions) , F = CFl] ) . 
action (breakbond, Syn, F, Features) - - > breakbondterm (Syn, F, Features) , 

moreaction (Conj , Args) , 
{Conj = tl ,F =A1; 

Conj \« C 3 , mergemods ( C (action, Al 3 ] , Args . Actions) , 
frame (PI, relation, Conj .Actions) , F« [FlJ } • 
action (createbond, Syn, F, Features) --> createbondterm (Syn, F, Features) , 

moreaction (Conj , Args) , 
{Conj = [] ,F =:A1; 

Conj\=[] , mergemods ( [ [action. All ] , Args , Actions) , 
frame (Fl, relation, Conj , Actions) , F = [Fl] } • 
action (inactivate, Syn, F, Features) -~> inactivateterm (Syn, F, Features) , 

moreaction (Conj , Args) , 
{Conj s [] , F =A1; 

Conj\=* U , mergemods ( [ (action, Al) ] , Args. Actions) , 
frame (Fl, relation, Conj .Actions) , P - [Fl] } . 
action (react, Syn, P, Features) reactterm (Syn, F, Features) , 

moreaction (Conj , Args) , 
{Conj « [] ,F *A1; 

Conj\=[3 , mergemods ( [ [action, Al] ] , Args, Actions) , 
frame (Fl, relation, Conj .Actions) , F* EPl) } . 
action (release, Syn, P, Features) releaseterm (Syn, F, Features) , 

moreaction (Conj , Args) , 
{Conj = [] .F -Al; 

Conj\* [] , mergemods ( ( [action, Al] ] , Args, Actions) , 
frame (Fi, relation, Conj .Actions) , P » [Fl]}. 
action (signal, Syn, P, Features) signalterm (Syn, F, Features) , 

moreac t ion (Conj, Args ) , 
{Conj = [] , F =rAl; 

Conj\=[] , mergemods ([ [action, Al] 1 , Args, Actions) , 
frame (Fl, relation, Conj .Actions) , F= [Fl] } - 
action (substitute, Syn, F, Features) substituteterm (Syn, F, Features) , 

moreac t ion ( Conj , Args ) , 
{Conj ^ [) ,F.=A1; 

Conj\-[], mergemods ([ [action, Al] 1 , Args, Actions) , 
frame (Fl, relation, Conj .Actions) , F = [Fl] } . 
action (transcribe, Syn, F, Features) --> transcribeterm (Syn, F, Features) , 
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moreaction (Conj , Args) , 
{Conj = [) ,F =A1; 

Conj\» [] # mergemods ( [ [action, Al] ] .Args, Actions) , 
frame (Fl, relation, Conj .Actions) , F = tFl] } . 
action (promote. Syn,F, Features) promotetertn(Syn. F. Features) . 

moreact ion ( Conj .Args) , 
{Conj = CI . F sAl; 

Conj\= (1 , mergemods ( C (action, Al] 1 , Args .Actions) , 
frame (Fl , relation. Conj .Actions) , F « tFl]}. 
action (generate, Syn. F, Features) generateterm(Syn. F. Features) , 

moreact ion (Conj ,Args) , 
{conj = CI ,F =A1; 

Conj\= C] , mergemods ( [ [action. Al) ] . Args, Actions) , 
frame (Fl , relation, Conj .Actions) . F s [Fl] } . 
action (cause, Syn, F, Features) causetermCSyn, F, Features) , 

moreac t ion (Conj , Args) , 
{ Conj = [ ] , F =A1 ; 

Conj\= [] , mergemods ( [ [action, Al] } .Args, Actions) , 
frame (Fl , relation. Conj , Actions) , F * tFl]}. 

% binds, phosphorylates and activates 
moreaction (Conj , Args) sem_conj rest (Conj 1) , 

oneaction (Sem, Syn, A, Features) , 

moreaction (Conj2 , Alist) , 
{Conj2 - [] . Alist= [] . Conj=Conjl, Args = [ [action. A] ] ; 

Conj2 \= [] , Conj = Conj2, 

addmod( [action, Aj .Alist, Args) } . 

moreaction { E3 , [) ,S,S) . 



passiveconnect ( Sem. [ven] . Target . Features ) - - > 
scm_beterm(_) , 

connec tact (Sem, [ven] , Target , Features ) . 



processpatt (A) disease (A) . 
processpatt (A) --> process (A) . 



optbyorprepagent (f irst, A) byagent (A) . 

optbyorprepagent (second. A) prepagent (A) . 

optbyorprepagent (first , A) (] , {A = x} . 

byorprepagent (first , A) - - > byagent (A) . 
byorprepagent (second , A) - - > prepagent (A) . 

optbyagent (A) - - > byagent (A) . 
optbyagent (A) (] , {a = [x] } . 

byagent (A) - - > [by] , 

substances (A) . 
byagent (A) [by], 

nounactionpatt (A) . ' 
prepagent (A) withobject (A) . 

prepagent (A) toobject (A) . 

% prepagent (A) andobject (A) . 

prepagent (A) - - > of object (A) . 
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% optprepagent (A) 
optprepagent (A) - - > 
optprepagent (A) - - > 
optprepagent (A) 
optprepagent (A) - - > 
optprepagent (A) - - > 



> byagent (A) . 
of object (A) . 
withobject (A) 
toobject (A) . 
andobject {A) . 
[], {A= Cx)}. 



of object (A) Cof 1 , 

noiinactionpatt (A) . 
ofobject(A) [of], 

substances (A) . 
of object (A) [of], 

actionof (A) . 
ofobjecti(A) Cof 3 , substance (A) 

optof object (A) ofobject(A). 
optofobject ( [xl ) — -> (] . 



% to parse Binding of Fyn and Bad. 



processobject (A) process (A) . % can be expanded to nounactpatt, etc. 

% optwithobject (A) withobject (A) . 

% optwithobject (A) [] , (A = Cx] } . 



withobject <A) 
toobject (A) 
andob j ect (A) 
prepobject (A) 
prepobject <A) 



(with), substances (A) . 
--> [tol , substances (A) . 
- - > [and] , substances (A) . 

[to] , substances (A) . 
--> [with}, substances (A) . 



optbyarg(A) --> [by], 

actionarg(A) . 
optbyarg(A) --> substances (A) , 

optbyarglA) [] , {A = ['substance unknown']}. 

prepopt [to] - 

prepopt [with] . 

prepopt [by] . 

prepopt --> [of] . 

prepopt --> [3 . 



% toopt 

toopt [to) . 

toopt [] . 
% withopt 

withopt [with] . 

withopt [] , 



optdash ['-•). 

optdash [ ) . 

optof [of] » 

optof [ ] . 

/* optactionarg (A) --> actionarg (A) . 

optactionarg( (] ) --> (] . */ 

optactionarg (A) 

actionarg (A) . 
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% there is no further curgument 
Gptactionarg (A) - - > 

{A ^ 13 }. 

% substances (F) --> substance (F) . 

% substances (F) substance (PI) , 

% moresubstances {Conj , Plist) , 

% { Conj a CI . Plist « (1 . F s PI ; 

% Conj \* (1 , 

% mergemods (PI, Plist, Args) , 

% f r ame (F, relation, Con j,Args) 

» }• 

% substances (F) substanceswithmods (F) . 

% substances (A) 

% proteins (A) . 

V subswithmods , txt 

% substances is saved in a symbol table (st) ; 
% check for success/failure 1st 

% Case where substances is in st and has been successful 
sxibstances (Fmt> --> checkst (substances, s, Fmt) . 
% Case where substance is in st as a failure, 
substances {_) checkst (substances, f,_) , {l, fail}. 

substances ( F) - - > 

snoop (SO, SO) , 
{ \+ checkst (substances, 1, s,_, SO, _)) , 
Imods ( Lmods ) , % left modifiers 

(severalsubstances( [relation, Conj, First I Rest] ) , % conjoined substances 

rmods ( Rmods ) , % right modifiers 

% create list of lists containing distributed mods, of substances 

{ distributesiabs (Dist, [First [Rest] , Lmods, Rmods) , 
% check Lmods - "no" Fl or F2 should be changed to no Fl and no P2 

fixconj (Lmods, [rel,Conj) , (rel,C2)) , 
%splice ( [Conj , Dist J , F) 

frame ( F < relation , C2 , Dist ) } ; 
% substances and modifiers without conjunction 

substance (Dl) , 

rmods (Rmods) , 

{Dl « [Typel, Substance 1 |ModsDl] , 
delete (ModsDl, [] , ModsD2) , 
append { [Lmods , Rmods] , NodsD2 , Al lmods 1 ) , 
delete (Al lmods 1, [] . Allmods2), 
frame {F, Typel, Substancel,Allmods2) }) , 
snoop (S.S) , 

{addst (svibstancesM.s, F»SO.S) } . 

/* substances(F) --> snoop(SO , SO) , 

{\+ checkst (substances, 3 ,s,_,SO,_)}, 
complex (F) , 
{addst (substances, 3, s, F, SO, S) } . 

*/ 

% no more substances- save failure 

substances (_) addst (substances. 0, f,_) « {i, fail}. 
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severalsubstances (F) --> substance (Pi) , 

moresubstances (Conj , Plist) , 
{ Conj = a , Plist = CJ , F =: PI ; 
Conj \= [1 , 

addmod ( PI , Pi is t , Args ) , 
frame (F, relation, Conj ,Args) 

}. 

% • X, Y, and Z' 
moresubstances (Conj ,Args) sem_conjrest (Conjl) , 

substance (PI) , 

moresubstances (Conj 2 , Plist) , 
{ Conj2 = [1, Plist = [3, Conj = Conjl, Args = [Pi] 
Conj 2 \= [] ,Conj2\= /, Conj = Conj 2, 
addmod (PI, Plist, Args) 

}• 

% to allow for substances with modifiers 
moresubstances (Conjl, Args) sem_conj rest (Conjl) , 

substances (Args) « { ! } • 



moresubstances ( (}#(]) --> [1. %no conjunction 



% distributesubs 

% distributes left mods and right mods over list of findings creating 
% list of lists of findings with mods 
distributesubs ([],(] ,_,_) !. 
distributesubs (Dist, [Dl|Tail] ,Lmods,Rmods) :- 

distributesubs (Dist 2, Tail, Lmods,Rmods) , %distributed for remainder 

Di = [Typel, Substancel|ModsDll , 

append ( (Lmods,Rmods] ,ModsDl, Allmodsl) , 

delete (Allmodsl, C] ,Allmods2) , 

f rame (D, Typel, Substance l,Allmods 2) , 

append ( [D] ,Dist2,Dist) . % Combine findings to get list of findings 

Imods (A) --> stateterm(F) , 

{frame(A, state, F, [] ) ) . 
Imods ([]) --> sem_measure (_) . 
Imods ([]) -->(]. 
rmods( CI ) -> C3 . 

stateterm(F) acclex (state, F) . 

% for past participle of createbond and breakbond actions, the target 
% is the word. ex. : phosphorylated , dephosphorylated, methylated 
stateteann(F) 

snoop(SO,SO) , % get the initial string 
createbondterm ( [ven] , / 

{SO = %get the^first word of the string 

stateterm(F) 

snoop(SO,SO), % get the initial string 
breakbondterm < [ven] , _) » 

{so e } - %get the first word of the string 

% may have to add attachterm for 'bound' 
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% Taken from MedLEE grammar co handle '3 cm' 
sem_jneasure (M) 

sem_premeasure , 
sem_quantityterm(N) . 
optdash, 

sem_^mea sure term (Unit) , 
{ frame(M, measure, [N, Unit] ,[] ) }. 
% complex predicates added November 8, 1999 
% CrkL-C3G complex 
% ras : raf-i association 

% ras : raf-1 complexes * 
% shc-grb2-sos 
% TCR/CD3 complex 

% p/CAF-p/CIP-CBP/p300-SRC-l coit^lex 
% Ras:Raf-l complexes 
complex(C) proteins (P), 

{P = [A,B|J,A \= (], B \- []}, 
optcomplexword , 
{ £rame(C, complex, [P] , []) }. 

% a complex of NFAT4 .with calcineurin 
complex (C) --> complexword, 

complexarg (A) , 

{frame (C, complex^ [A} , [1 ) } . 

complexarg (A) --> [of], proteins (A). 

complexarg (A) [between], proteins (A) . 

% a complex between MyD88. IRAK-2, and the IL-lRs 

complexarg (A) action (contain) , proteins (A) . 

% Complexes containing BOB.l/OBF.l and Oct proteins 

proteins (P) --> protein (A) , 

moreproteins(Pl) , 

{(A\=C]; append ([A], PI, P))}. 



moreproteins <A) 



> proteinconnector^ 
proteins (A) . 



moreproteins ( [ 1 ) - - > 
proteinconnector --> 
proteinconnector --> 
proteinconnector 



% connector 
% connector - - > 
proteinconnector (C) 
optconnector --> 
optconnector --> 



['-']. 
[•/•] . 
> [•:•]. 

[','], teJcen out not to conflict with relation in 
[and] . moresubstances 

- - > [with] . 
proteinconnector . 
[] . 



complexword 
complexword 
complexword 



[complex] . 

[complexes] . 

C ' signaling complexes ' ] 



optcomplexword 
opt compl exwor d 



--> complexword. 



substance (A) protein (A) . 
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s\ibstance (A) 
svibstance (A) 
substance (A) 
substance (A) 
substance (A) 
substance (A) 
substance (A) 
substance (A) 
substance (A) 
substance (A) 
substance (A) 
substance (A) 



cell(A) , 

species (A) . 
--> structure (A) . 
--> domain (A) . 
--•> gene (A) . 

geneorprotein(A) 

aminoacid (A) , 

smallmolecule (A) 
- - > matter (A) . 

proteinsite (A) . 

disease (A) . 

complex < A) . 



% this will be modified later 



protein (A) 

proteinterm(P) , 

{frame (A, protein, P, C] ) } • 

complex (A) - - > 

complexterm(P) , 

{frame (A, complex,?, {})) . 

cell (A) --> 

cellterm(P) , 

{frame (A, cell, P. CI) ) . 

species (A) 

speciesterm(P) , 

{frame (A, species, P, [] } } . 

structure (A) 

structureterm(P) , 

(frame (A, structure, P, [] } } . 

domain (A) 

domainterm (P) , 

{ frame (A; domain, P, [] ) } . 

gene (A) 

geneterm (P) , 

{frame (A, gene, P, [) ) } . 

geneorprotein (A) 
gpterm(P) , 
[X] , 

{ (X s gene, frame (A, gene, P, t] ) ; 

X =s protein, frame{A, protein, P, [] ) ; 

X\a gene, X \« protein, frame (A, geneorprotein, P, []))}• 



aminoacid (A) - - > 

aminoacidterm(P) , 

{ frame (A, aminoacid, P, [ ] ) } . 

smallmolecule (A) --> 

5mallmoleculeterm(P) , 

{frame (A, * small molecule' ,P, [3 ) } • 



matter (A) - - > 
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mattertenn(P) , 

{ frame (A, substance , P , ( 1 ) } . 

proteinsite{A) 

proteinsiteterm(p) , 

{ frame (A, ' protein site ' , P , ( ) ) } . 

disease(A) 

diseaseterm(P) , 
{frame (A, disease, P, [} ) } . 
process (A) 

processterm(Syn, F, Features) , 
{frame (A, process, F, (])/!}. 
process (A) --> 

processterra(P) , 

{ frame (A, process , P, [)),!}. 



% terminals 
proteinterm{F) 
complexterm (F) 
cellterm(F) 
speciesterm (F) 
structureterTn(F) 
domainterni(F) 
geneterm { F) 
gpterm (F) 
aminoacidterm (F) 
smallmoIeculeterm(F) 
matterterm(F) 
proteinsiteterm (F) 
diseaseterm (F) 
processterm (F) 



- > 
--> 

- - > 

- -> 

- - > 

- > 



acclex (protein, F) . 
acclex (complex, F) . 
acclex (cell, F) . 
acclex (species, F) . 
acclex {structure, F) . 
acclex (domain, F) . 
acclex (gene, F) . 
acclex (gp, F) . 
acclex (aminoacid. F) . 
acclex (smallmolecule, F) . 
acclex (substance, F) . 
acclex (proteinsite, F) . 
acclex (disease, F) * 
acclex (process, F) . 



% action (activate, Syn, F, Features) 



- - > act ivateterm ( Syn . F . Features ) 



activateterm (Syn, F, Features) 
attachterm (Syn, F, Features) 
breakbondterm (Syn, F, Features) - 
createbondterm (Syn, F, Features) - 
inactivateterm (Syn, F, Features) - 
reactterm(Syn,F, Features) 
releaseterm (Syn, F, Features) 
signalterm (Syn, F, Features) 
subs tituteterm( Syn, F, Features) - 
transcribeterm{Syn, F, Features) - 
promoteterm (Syn, F, Features) 
processterm (Syn, F, Features) 
generateterm (Syn, F, Features) 
cause term (Syn, F, Features) 



-> acclexss (activate 
- > 



Syn, F, Features) . 
acclexss (attach, Syn, F, Features) . 
-> acclexss {brea)cbond, Syn, F, Features) . 
-> acclexss (createbond, Syn, F , Features) . 
-> acclexss (inactivate, Syn, F, Features) , 
-> acclexss (react,. Syn, F, Features) . 
-> acclexss (release, Syn, F, Features) . 
-> acclexss (signal , Syn, F, Features) . 
-> acclexss (substitute, Syn, F, Features) . 
-> acclexss (transcribe, Syn, F, Features) . 
-> acclexss (promote, Syn, ^, Features) , 
-> acclexss (process, Syn, F, Features) , 
- > acclexss (generate , Syn , F , Features ) . 
-> acclexss (cause, Syn, F, Features) . 



% Semi is t contains a phrase which is an action 
actionchk (Semlist) : - 

intersect (Semlist, [attach, cause, createbond, breakbond, activate, 

inactivate, substitute, transcribe, express , promote, signal 1 ) 



% Semlist contains a phrase which is a connector action 



18 



BNSDCXSia <WO_00e3e87A1J_> 



wo 00/63687 



PCT/USOO/10302 



connectchk(Semlist) ;- ' 

intersect (Semlist, [cause, activate, inactivate, substitute, 
promote, signal) ) . 



%%%*%%*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*%%%%% 
% Genome sectionc: ends here * 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% relations are connected by conjunctions, or 
% certain 'conn' prepositions, 

* Taken from MedLEE grammar to handle connectives that are conjunctions 
% Ex: "severe markings, possibly from tuberculosis" 
sem_relation(F, {] ) % relation and modifiers 

sem_commapunc , 
sem_certainty { [] ,C,rel) , 
prepterm(P, conn) , 
{frame(F, rel,P.C) } . 
%plice( [[rel,P] ,C] ,R) . 

% Ex: "markings, swelling", "markings and swelling" 

sem_relation(R, [] ) --> sem_conjrel (R) , 

sem^commapunc . 
% "density may represent known tumor" 

* "markings, and swelling" 
sem^conjrel {F) --> 

sem_commapvinc , 
sem_con j term (Conj ) , 
{frame (F, rel, Con j , (] ) } . 

sem_conj rest (Conj) - - > % restricted conj, has not s€m_relation_showopt 
sem_commapunc , 
sem_conj term (Conj) . 
% "markings, swelling" 
sem_con j rest ( ' , ' ) - - > 
snoop {SO, SO) , 

s em_commapunc , 
snoop (S, S) , 
{SO \= S}. 

% Treatment of Verbs from MedLEE 's Grammar 
% form of "be" 

sem_auxverb(B) - -> sem_beterm(B) . 
% form of "do" 

sem_auxverb(B) - -> sem_doterm(B) . 
% form of ••have" 

sem_auxverb(B) sem_haveterm(B) . 

sem_recrel --> prepterm(in,_) - 
sem_recrel prepterm{to,_) . 
% "is not" 

sem_auxrel (V) sem_auxverb(_) , 

sem_negterm (V) . 
sem_auxrel (V) sem^auxverb (V) . 

% left modifiers of findings include negation, quantity, certainty, degree, and 
% change type modifiers 
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sem_integer (W) - - > iw) , { integer ( W) } . 
sem_integer (W) integerterm(W) . 

sem_timeunit (T) sem_timeunittenn(T) . 

% From MedLEE grammar - "lasting 2 days", "for 2 days", "times 2 days" 
sem_dura t ion ( F ) - - > 

sem^durpreps, 

sem^premeasure, tabout 

sem^timemeasure (T) , 

sem^durationmod, % opt. - "in duration" 
{ frame (F, duration, [T] , [] ) } . 
sem_duration { [) , S, S) . 

sem_durpreps --> [times]. 
sem_durpreps - - > 

prepterm(for . 
sem_durpreps (lasting, for) . 
sem_durpreps [lasting] . 
sem_durpreps [lasted, for] . 
sem_durpreps --> [lasted]. 
sem_durationmod 

sem_aposts, %opt. - "'s" 
[duration] . 
sem_durationmod --> (inj , [duration] . 
sem^duratidnmod --> [) . 
sem_aposts C'*''!. [s] . 

sem_apost C3 • 

% sem_f requency talcen From MedLEE 's grammar 

% "two times", "times two", "two times a/per week", "two times daily" 
sem_f requency(F) 

sem f reqterm(FI) , % "once" 

sem_f reqterm ( F2 ) , % "a day" 

{frame (M,unitval, [PI, F2] ,[]) , 
frame (F, frequency, [M] ,[])}. 

s em_f requency(F) --> 

sem_freqterm(M) , % "qid", "daily" 
{ frame (F, frequency, M, [] ) } . 

% "2 times", 

sem__f req[uency ( F) - - > 

sem_premeasure , 

sem_quantityterm(M) , 

sem^times, 
{frame (P, frequency, CM] , I] ) } . 

% "times 2" 
sem_f requency (Q) - - > 
sem^times, 

sem_quantityterm(Ql) , 
{ frame (Q, frequency, Ql , [] ) } . 
sem_frequency (F) 

[q] , sem_quantityterm(Q) , 

sem_timeunit (T) , 
{ frame (F, frequency, [unitval, (Q,T] 1 , [) ) } . 
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sem^f requency (F) --> sem_eachevery, 

sem^quantityterm (Q) , 
sem_cimeunit (T) , 

{ frame (F, frequency, {unitval, [Q, T, every) ] , [))). 
seni_f requency <Q) % "second" 

sem_ordinal <o) , 
sem_t imeopt , 

{ frame (Q, frequency, O, {})] . 
sem_f requency ( n /S,S) . 
sem_timeopt [time] . 

sem_timeopt --> [] . 
sem^eachevery teach] . 

sem_eachevery [every] . 

sem_times--> [times] . 
sem_times--> tx) . 



% Taken from MedLEE's grammar 

negation modifier - "no" as in "no cardiomegaly" 
sem__nega t ion ( F ) - - > 

sem_negterm(N) , 
{ frame (P,neg,N, [] ) ) . 
% negation not present 
sem_negation( [] ,S0,50) . 

% TaJcen from MedLEE's grammar 
% quantity modifier - "two" as in "two masses" 
sem_quantity (F) - -> 
snoop (SO, SO) , 

{ \+ checkst {sem__dates, l,s,_,SO,_) }, % not a legitimate date 
sem__quantityterm{Q) , 

sem_quantityrmod(_) , % "2 or 3", "2 to 3" 

{X-K next_wordunit (SO) , % rule out '2 mm' 
frame (F, quantity, C] ) 

). 

sem_quantity ( [] ,SO,S0) . 



sem_commapunc ( ( ' , ' |S] ; 
sem_commapunc (S, S) . 
sem_con j term ( C> 
sem_doterm (D) 
sem_endmarlc ( [ . | S] , S) . 
sem_endmark { C ; | S] , S) , 
sem_f reqterm (F) 
sem_ha ve t e rm ( H ) 
integerterm ( I) 
sem_measureterm (M) 
sem_medterm (M) 
sem_negterm (N) 
prepterm(p,C) 
sem timeiinitterm<T) 



S) . 

> acclex (con j , C) , 

> acclex (vdo, D) . 



> acclex {freq, F) . 

> acclex (vhave,H) . 

> acclex (integer, X) . 

> acclex (unit, M) . 

> acclex (med,M) - 

> acclex (neg,N) . 

> acclex (p, CP,C] ) . 

> acclex (timeunit , T) . 
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% lexog - adapted from MedLEE lexicon 

%%%%%%%%%%%%%%%%%%% CLOSED WORD CATEGORY LEXICON %*%%%%%%%%%%%%%%%%%%%%%% 
%%%%%%%%%%%%%%%%%%%%% NEGATIONS %%%%%*%%%%%*%%%%%%%%%%%%%%%%%%%%%%%%%*% 

: -unknown fail ) . 

: -multifile (wdef/3) . 
wdef {cannot, neg, no) . 
wdef (neither, neg, no) . 
wdef (never, neg, no) . 
wdef {no,neg,no) . 
wdef (non. neg , no) . 
wdef (none, neg, no) . 
wdef (not, neg, no) . 
wdef (nothing, neg, no) , 

%%%%%%%%%%%%%%%%%%%%% CONJUNCTIONS %%%%*%%%%%%%*%%%%%%%%*%%%%%%%%%%%%%%%*% 

wdef ( ' & • , conj , and) . 
wdef ( ' / • , conj ,or) . 
wdef ( ' - ' , grammar ,'-'). 
wdef ( ' + ' , conj , and) . 
wdef (although, conj , and) . 
wdef (and, conj , and) - 
wdef (as , conj , and) . 
wdef (because, conj , and) . 
wdef (but , conj , and) . 
wdef ( * , • , conj ,','). 
wdef (except , conj , no) . 
%wdef (if , grammar, if ) , 
wdef (minus , conj , no) . 
wdef (nor , con j , no) . ■ 
wdef (or , conj , or) . 
wdef (that, grammar, that) . 
wdef (though, conj , and) , 
wdef (thru, conj , and) . 
wdef (verses , conj , or ) . 
wdef (versus, conj ,or) . 
wdef ( vs , conj , or) . 
wde f ( when , grammar , when ) . 
wdef (where, grammar, where) . 
wdef (whereas , conj , and) . 
wdef (which, grammar, which) . 
wdef (while, conj, and) . 
wdef (who, grammar, who) . 
wdef (yet , conj , and) . 

%%%%%%%%%%%%%%%*%%%%% PREPOSITIONS %%%%%%%%%%%%%%%%%%%%%*%%%%%%% 

wde f ( above , ploc , above ) . 

wdef (about ,p, [approximately, nconnj ) - 

wdef (about, ploc, about ) . 

wdef {across, ploc, across) . 

wdef (abut ting, ploc, near) . 

wdef (accompanies, p. [with,connl ) . 

wdef (accompanying , p , [with, conn] ) . 

wdef (adjacent , ploc, adjacent) . 

wdef (adjacent, region, adjacent) . 

wdef (after, p, [after, connj ) . 

wdef (after, tprep, after) . 

wdef (along , p , Con , nconn] ) . 

wdef (approximately, p, [approximately , nconn] ) . 
wdef (aroxmd,p, [approximately, nconn} ) . 
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wdef (at,p, Jat.nconn] ) . 

wdef (atop,p, [on,nconii) ) . 

wdef (before, ploc, before) . 

wdef (before, tprep, be fore) . 

wdef (behind, ploc, behind) . 

wdef (below, ploc, below) . 

wdef (between, ploc, between)*. 

wdef (beyond , ploc , beyond) . 

wdef (by, ploc, near) , 

wdef (despite, p, [with, conn] ) . 

wdef (during, p, [during, connj ) . 

wdef (during, tprep, during) - 

wdef (encasing, ploc, encasing) . 

wdef (extending, p, [in,nconn) ) . 

wdef (following,?, [after, conn] ) . 

wdef {following, tprep, after) . 

wdef {for,p, [for,nconn3 ) , 

wdef (from, p, [from, conn] ) . 

wdef (in,p, [in,nconn]) . 

wdef (including, p, [with, conn] ) . 

wdef (into, p, [in,nconn]) . 

wdef (involving, p, [of,nconn]J . 

wdef (next, tprep, next) . 

wdef (occupying, p, (in,nconnl) . 

wdef (on, p, [on,nconn)) . 

wdef (of, p, [of.nconn]) . 

wdef (over, ploc, oyer) . 

wdef (overlie, ploc, over) . 

wdef (bverlied, ploc, over) . 

wdef (overlies , ploc, over) . 

wdef (overlying, ploc, over) . 

wdef (prior , tprep , before ) . 

wdef (near, ploc, near) . 

wdef {radiating, ploc, radiating) . 

wdef (regarding, p, [about ,nconn] ) . 

wdef (roughly, grammar, roughly) . % 'roughly 6 mm' 

wdef (since , p, (since, conn] ) . 

wdef (since , status, subsequent) . 

wdef (through, p, (in,nconn]) . 

wdef (throughout, p, [in,nconn]) . 

wdef (to, p, [to, nconn]} . 

wdef (toward, p. Cto.nconn} ) . 

wdef (towards, p, [during, conn] ) . 

wdef (under, ploc, below) . 

wdef (underneath, ploc, below) . 

wdef (until , tprep, until) . 

wdef (up , grammar , up) . 

wdef (upon, p, (on,nconn]) . 

wdef (via, p, [with, conn]) . 

wdef (with, p, [with, conn]) . 

wdef (within, p, [in, conn] ) . 

wdef (without, p, (no, conn]). 

%wdef (without , neg, no) . 

%%%%%%%%%%%%%%%%%%%%%%%%%% UNITS OF MEASURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
wdef ( ' % ' , unit , percent) . 
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wdef <cc,unit , cc) , 

wdef (centimeter, unit, cm) . 

wdef (centimeters, unit , cm) . 

wdef (cm, unit , cm) . 

wdef (degrees , unit , degree) , 

wdef (gm, unit, gram) . 

wdef (gms , unit , gram) . 

wdef (gram, unit, gram) . 

wdef (grams, unit , gram) . 

wdef (kg, unit , kilogram) . 

wdef (kilo, unit , kilogram) . 

wdef (kilogram, unit, kilogram) . 

wdef (kilograms, unit, )cilograms) . 

wdef (liter, unit, liter) . 

wdef (liters, unit, liter) . 

wdef (microgram, unit , microgram) . 

wdef (micrograms , unit , microgram) . 

wdef (milliliter, unit, ml) . 

wdef (milliliters, unit , ml) . 

wdef (milligram, unit, mg) . 

wdef (milligrams, unit, mg) . 

wdef (milliseconds, unit, millisecond) . 

wdef (millivolts, unit, millivolt) . 

wdef (ml, unit , ml) . 

wdef (millimeter, unit, mm) . 

wdef (millimeters, unit, mm) . 

wdef (mm, unit, mm) . 

wdef (ozs, unit, ounce) . 

wdef (percent , unit, per cent) . 

%%%%%%%%%%%%%%%%%%%%%%%%% NUMBERS %%%%%%%%%%%%%%%%*%%%%%%%%%%%%%*%%%%%%%%%%% 

wdef (half , integer, 'one half) . 

wdef (semi, quantity, semi) . 

wdef (ii, integer, 2) . 

wdef ( iii , integer , 3) . 

wdef ( vi , integer , 4 ) . 

wdef (v, integer, 5) . 

wdef (vi, integer, 6) . 

wdef (vii, integer, 7) . 

wdef ( viii , integer . 8 ) . 

wdef (ix, integer, 9) . 

wdef (xii, integer, 12) . 

wdef (xiii, integer, 13) . 

wdef (one, integer, 1) - 

wdef (two, integer, 2) . 

wdef (double, quantity, double) . 

wdef ( three , integer , 3 ) . 

wdef (four, integer, 4) . 

wdef (quadruple, quantity, quadruple) . 

wdef (five, integer, 5) . 

wdef (six, integer. 6) . 

wdef (sixty, integer, 60) . 

wdef (seven, integer, 7) . 

wdef (eight, integer, 8) . 

wdef (nine, integer, 9) . 

wdef (ten, integer, 10) . 

wdef (eleven, integer, 11) . 

wdef (twelve, integer, 12) . 
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wdef (thirteen, integer, 13) . 
wdef (fourteen. integer , 14 ) . 
wdef (fifteen, integer, IS) . 
wdef ( s ixteen , integer , 16 ) . 
wdef (seventeen, integer, 17) . 
wdef (eighteen, integer, 18) . 
wdef (nineteen, integer, 19) . 
wdef (twenty, integer, 20) . 
wdef (thirty, integer, 30) . 
wdef ( forty , integer ,40). 
wdef (fifty, integer, 50) . 
wdef (sixty, integer, 60) . 
wdef (seventy, integer , 70) . 
wdef (eighty, integer, 80) . 
wdef (ninety, integer, 90) . 
wdef (hundred, integer, 100) . 
wdef (thousand, integer, 1000) . 
wdef (million, integer, 1000000) . 
wdef (billion, integer , billion) . 
wdef (zero, integer, 0) . 
wdef (first, ointeger, 1) . 
wdef (second, ointeger, 2) . 
wdef (third, ointeger, 3) . 
wdef (fourth, ointeger , 4 ) . 
wdef (fifth, ointeger, 5) . 
wdef (sixth, ointeger, 6) . 
wdef (seventh, ointeger, 7) . 
wdef (eighth, ointeger, 8 ) . 
wdef (ninth, ointeger, 9) . 
wdef (tenth, ointeger, 10) . 
wdef (eleventh, ointeger, 11) . 
wdef (twelvth, ointeger , 12) - 
wdef (thirteenth, ointeger, 13) . 
wdef (fourteenth, ointeger, 14) , 
wdef (fifteenth, ointeger , 15) . 
wdef (sixteenth, ointeger, 16) . 
wdef (seventeenth, ointeger, 17) . 
wdef (eighteenth, ointeger, 18) . 
wdef (ninteenth, ointeger , 19) . 
wdef (triple, quantity, triple) - 
wdef (twentieth, ointeger, 20) . 
wdef (thirtieth, ointeger, 30) . 
wdef (single, quantity, 1) . 
wdef (solitary, quantity, 1} . 

wdef ( frequency , grammar , frequency) . * / 
wdef ^ . * , grammar, ' . * ) . 
. wdef ( • ; ' , grammar, ' ; ' ) . 

wdef ( '/'/grammar, •/') . . ' 

wdef ( • : ' , grammar, ' : ' ) . 

wdef { • ? ' , certainty, 'moderate certainty • ) . 
wdef ( • + ' , certainty, ' high certainty* ) . 
wdef ( • • ' • , grammar, '••'). 

%%%%%%%%%%%%%%%%%%%%%%%%% FREQUENCIES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

wdef (once, f req, 1) . 
wdef (times , grammar , x) . 
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wdef (twice, freq, 2) . 



26 

SNSOOCID: <WQ_0063687A1J_> 



wo 00/63687 



PCT/USOO/10302 



% lexicon with XexOg containing common English words adapted from lexO of 
MedLEE% 

% lexig from lexl of MedLEE 
% August 23, 1999 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%***%%%%%%%* 

% CAROL FRIEDMAN 

% QUEENS COLLEGE, COLUMBIA UNIVERSITY 

% 

% Version 3.0 4-01-00 

% Version 2.0 1-31-96 

% Version 1.0 1-5-92 

% 
% 

SEMANTIC LEXICON FOR CLINICAL TEXT 



The lexicon consists of several files: % 
lexOg.pl: single word closed classes % 
lexlg.pl: single word - general modifier type words: % 

% 

wdef (category, target) . % 
word - is the name of the word being categorized; % 
category - is the semantic category for the word % 
target - is the canonical /standard form for the word % 
words which are synonyms should be assigned the same % 
canonical form. % 
multi-word phrases are categorized as follows: * 
phrase (word, category, phrase, target) . % 

% 

Semantic Categories: % 

% 

certainty '^possible" % 
canonical values limited to: moderate - for possible % 

high - for high possible % 
low ~ for low possible % 



% conj - relational operators "and", "or" , which connect one finding % 

* to another- finding % 
% neg - negation "no", "not" 

* quant - for (Quantitative infornnation "many" % 
: - unJcnown (_ , f ai 1 ) - 

: -ensure_loaded( [nsphrase, lexOg, lexlg, lexsemact, ^exsyn, lexsub] ) . 
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% definitions kept from MedLEE lexicon - lexl.pl 

wdef (be,vbe, *high certainty') . 

wdef (been, vbe, * high certainty*)- 

wdef t being, vbe, 'high certainty*) - 

wdef (was, vbe, 'high certainty*) . 

wdef (is,vbe, 'high certainty') . 

wdef (were,vbe, 'high certainty* ) . 

/* 

wdef (became, veer tainty, • high certainty ) . 
wdef (become, veer tainty, 'high certainty') . 
wdef (becomes , vcertainty. ' high certainty ' ) . 
wdef (becoming, veer tainty, 'high certainty') . 

put in action lexicon 
wdef (changed, change , change ) . 
wdef (changes , change , change) . 
wdef (changing, change , change ) . 
wdef (necessarily, certainty, 'high certainty*) . 
wdef (necessary, vrecommend, recommended) . 
wdef (necessitate, vstatus , need) . 
wdef (necessitated, vstatus , need) . 
wdef (necessitating, vstatus , need) . 
wdef (necessitates , vstatus, need) . 
wdef (need, vstatus , need) - 
wdef (needed, vstatus, need) . 
wdef (needing, vstatus, need) . 
wdef (needs , vstatus , need) . 

♦/ 
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% file ml_parser.pl 

:- multifile (phrase/5) . 

multifile (wdef/3) , 
: -unknown fail) . 

% Load in program components - library components are part of Prolog 
ensure_loaded( [library (basics) , library (not) , library (lists) , 
library (readin) , library (strings) , library (ctypes) , library (readconst) , 
library (date) , library (listparts) , library(sets) , 
radrec, radpardb, useful , util , tagging, lexicon, gengram] ) . 

initialization run. 
%run : - on_exception (Error, processrun, stop (Error) ) . 
runt ime_entry (start) processrun. 
runt ime_entry (abort) :- halt. 

% process report 
processrun process, halt. 

%stop (Error) :- 
% told, 

% write (us€r_error, ' Error : '), write (user_error, Error) , halt. 

% get user supplied parameters and process report 
pjro-^ess : - 

get_args (Mode, Inf ile,Outf ile, Prb,Undefs, Protocol) , • , 
(Examtype [1 ; % must have a domain 
process (Infile,Outfile,Prb,Undefs) > . 

% open Infile (text input) and process 
process (Infile, Out file, Prb.Undefs) :- 

see(Infile), seen, see(Infile), 

on^except ion ( Error , 

tes t_genome ( Out f i 1 e , Prb , Unde f s ) , 

app_errO (^Outfile, Error) ) , 
closef iles (Outf ile,Prb,Undefs) . 
process (_,Outfiley_,_) 

app^err (_, Outf ile, ' Program failed*). 

app_errO (_, Output , Error) : - 

tell (Output) , 

write (' <error> ') , 

write (• Prolog Error occurred: '), 

^PPJ^^^ (_r Output , Error) . 
app_errl (_, Output , Error) : - 

tell (Output) , 

write ( ' <error> ' > , 

write ('Error in input: '), 

app_err Output , Error) . 
app_er r ( _ , Output , Error ) : - 

tell (Output) , 

write (Error) , write (' </error> ') , nl. 

closef iles (Outf ile , Errfile, Unfile) : - 
tell (Outf ile) , told, 
(Errfile ^ U; tell (Errf ile) , told). 
(Uhfile = []; tell (Unfile) , told). 
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% Argument options - get user defined arguments 

% -p ProbFile (otherwise default is problem messages are not written to file) 
% -i Infile (if input is supplied by file and not standard input 
% -s Section (default is impression) 

% -m ^^ode (default is relax; the three choices are strict, relax, skip) 
% -o Outfile (if output should be file and not standard output) 
% -? Provide list of default arguments 

% -u Undefs (otherwise default is - undefined messages are not written 
% to a file) 

get_args(Mode, infile, Outfile, Prbfile, Undefs, Protocol) : - 
unix(args (Args) ) , 
(Args - [] , [ , writesyntax; 
Args = [•?'],!, writesyntax; 
Args = CX [Rest] , ! , 

set_args( txjRest] , Mode, Infile, Outfile, Prbf ile, Undefs, Protocol) ) . 

writesyntax : - 

write (user_error, 'geneparser t-mMode)*), 
nl {user_error) , 

write {user_error, ' [-t Out type) [-p Probfile) t-u Undefs] •) , 

nl {user_error) , 

write (user_error, • {-i infile] [-o Outfile] •) . 

nl (user^error) . 



BNSDOCID: <WO_0063687A1J_> 



30 



PCT/USOO/10302 

WO 00/63687 

that are ignored 



, nsphrase.pl - contains words/phrases 
nosem(both. [both]) . 

. . ^ r ^ VtA I I _ 



nosernvs^t" 

nosemCthe, Ithel ) 
nosem(a, lal ) 
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% file radpardb.pl 

% June 25, 1999 

% fail an unknown predicate 
: -unknown fail) . 
op(900, fy, tnot,once]). % same priority and type as \+ 
op(700, xfx, [\*,~»3). % same priority and type as « or == 

:- dynamic (sentno/l) . 

% \sem\radpardb . pi 

%parse_sentences (+Beg, -Fmt, -ParseErrors, -Undefineds, -Unsents , ■♦■Section, 



% -t-UserMode , ->-Examtype , Sentno, Outsno, IncSno) 

% Beg is list of sentences, Fmt is list of target forms, 

% ParseErrors are a list of sentences which could not parse, 

% Undefineds is a list of undefined words in sentence 

% Unsents is a lisc of sentence containing undefined words 

% Section is the section of the examination, UserMode is the 

% parsing mode specified by user, 

% Bxamtype is the domain (type of exam) 

% Sentno is the number of the starting sentence 

% Outsno is the last sentence number -i- i 

% IncSno is the amount that the sentence number should be increased 

% {i.e. it is 1 when called by parse_sects and 0 when in 

% recovery mode) 

% Each sentence is parsed independently. 



parse_sentences ([],(],[), f] _) !. %no more sentences 

parse_sentences (Beg, Fmtlist , Out fail , Outundef s,OutunSents, 
Sec t ion , UserMode , Examtype , _ , _ , IncSno ) : - 
get_sentence (Beg, S, Rest) , !, 

( isidentif ier (S) , !, % ignore identifier sentences - parse remainder 
parse_sentences (Rest , Fmti , Outf ail , Outundef s , OutunSents , 
Section, UserMode, Examtype,_,_, IncSno) , ! , 
(outputform(htext) , S \a [*.']» IncSno \« 0, %0 means in recovery 

mode 

append ( E [ [sentence. Si 3 ] , Fmt 1, Fmtlist) ; 
Fmtlist = Fmtl 

) 

% ( IncSno s 0 , ■ ; % on same sentence in recovery mode 

% sentno (Sno) , MewSentno is Sno ^ IncSno, 

% retract (sentno (_) ) , assert (sentno (MewSentno) ) 

%%), 

% Incsno = 1, write{ •**♦') , write^list (S, 3,_) , nl, !. 
% Incsno s 0, 

preprocess (S,Bs,Undef, Semlist, strict) , % bracket and check for xindefineds 

parse_modes (S , Bs, Semlist , Fmtl , Errors , Undef , Unsents , Section , Writef ail , 
Examtype, UserMode, IncSno) , % parse first sentence 

parse_sentences (Rest, Fmt2,Moreerrors,Moreundef s,MoreUnSents, 

Sec t ion, UserMode, Examtype,_,_, IncSno) , % parse remaining 
append (Errors, Moreerrors, Outf ail ) , % Combine failures 

{outputform(htext) , 

(Fmtl \« CI, IncSno \= 0, 
!, append ( [Fmtl] ,Fmt2, Fmtlist) ; % add extra bracket for 1st 
Fmt2 « I] , Fmtlist = Fmtl , ! 
) 
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append (Fmtl, Fmt 2, Fmt list) 
) , % Combine targets 

append (Unsents,MoreUnS€nts, OutunSents) , % Combine sentences 
append{Undef ,Moreundefs,Outundefs) % Combine undefined words 

) . 

%parse_modes (+S, +Bs, +Semlist, -Fmt, -Failures, +Undef, -Unsents, -^Section, 
% +WriteMessage , -t-Examtype, +Mode , +IncSno) 

% S is original sentence; Bs is sentence after lexical lookup 

% Semlist is list of semantic categories in sentence 

% Fmt is formatted output, 

% Failures is list of sentences/ fragments which could not be parsed. 

% Undef are words not in lexicon, Unsents are sentences containing 

% undefined words 

% Section is name of section being processed 

% WriteMessage is message returned from doresult (in case doresult fails) 

% Examtype is domain, Mode is user specified mode 

% IncSno is 0 if this is a fragment of a sentence that was already 

% parsed - but unsuccessfully; is 1 if this is a new sentence 

% Best possible - try to get the most accurate parse possible trying 
% all alternative strategies in turn if neccessary 
% All words in sentence are defined 

parse_modes (S,Bs, Semlist, Fmt, Errors, [] , U , Section, no, Examtype, Pmode, 
Inc) : - 

(Pmode a bpseg, E>modemod » mode2, !; %i43 recovery mode 
Pmode « bpseg2 , Pmodemod » mode2 , ! ,- 
Pmode s bpseg 3 , Pmodemod = mode 2 , ! ; 

Pmode « bpskip, Pmodemod = mode4, ! %in recovery mode 

% in user specified parse mode - don't parse in mode 5 or keyword 
Pmode \s= keyword, Pmode \« modeS, 
Pmodemod = model 
) , 

dosent (S,Bs, Semlist, Fmtl, Message, Section, Examtype, Pmodemod, _) , i, % 
strict first 

recovery (_,s,Bs,Semlist,Pmt2, Message, Errors, [] , C] .Section, 

Pmode, Examtype, _) , % try alternative modes if neccy 
(outputform(htext) , Inc \« 0. !, append( ( t (sentence, Si ), Pmtl, Fmt2] , Fmt) ; 
append ( Prat 1 , Fmt2 , Pmt ) 

% alternative strategies if have undefined words 

parse_modes (S,Bs, Semlist, Pmt, Errors, Undef , Unsents, Sect ion, no, Examtype, 

Pmode, Inc) 
Undef \= [] r 

recovery S , Bs , Semi ist , Fmt 1 , yes , Errors , Undef , Unsents , Section , 

Pmode, Exacri type, , % try alternatives if have undef ineds 
{outputform(htext) , Inc\a 0, I, append ([ [sentence, S) ], Fmtl , Fmt ) 
Pmt s Pmtl 
) . 

% key word strategy is fastest but least reliable; 

parse_modes (S , Bs , Semlist , Pmt , Errors , Undef , Unsents , Section, no , Examtype , 
Pmode, Inc) :- 
(Pmode » keyword; Pmode = modes 
; Pmode s modes) , 

recovery (5 , S , S , Semlist , Fmtl , yes , Errors , Undef , Unsents , Section, Pmode , 
Examtype, _), 

(outputf orm (htext) , Inc \« 0 , ! , append ( t [sentence, S) ] , Fmtl , Pmt) ; 
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Fmtl a Fmt 
) . 

% Parsing/Recovery modes 

% parse_modes(+Level, 4-s, ^Bs, -fSetn, -Fmt, +Failed, +Undef , +Unsents, +Section, 

% +Pmode , +Examtype , _) 

% Level is the recovery level of the predicate 

% S is the original sentence list 

% Bs is the 

% Sem is the list of semantic categories in the sentence 
% Fmt is the formatted output for the sentence 

% Failed is 'yes' if the parse was unsuccessful, and 'no' otherwise 

% Undef is a list of words in sentence which are undefined (not in lexicon) 

% Unsents are the lists of sentences/segments which could not be parsed. 

% Section is the section of the report 

% Pmode is the user specified parse mode 

% Examtype is the domain 

% mode 1 is the strictest parsing mode - the parser succeeded for the complete 
t original sentence using the grammar; all words in original sentence 

% are defined in lexicon 

% mode 1 - alternative not needed because parse succeeded 

recovery [), no, [], Undef , Unsents, !. 

% - no alternative strategy allowed in mode 1 

* in case where there are no undef ineds, Noparse is S 

reccvery (1,S,_,_, [] ,yes,S, [] , 1] Pmode, :- 

Pmode s strict; Pmode = model, ! . 
% in case there are undef ineds, Unsents is S 



recovery (1,S,_,_, [), yes, Noparse, Undef , Unsents, Pmode, :- 

(Pmode = strict; Pmode « 'model'), 

Undef \= (] , Unsents « S, Noparse = CI ^ 
recovery (l,S,_,Setnlist, tl #yes, S, _,_,_) : - 
\ sentence contains no relev. information, don't try to recover 
% \+ (subtype (finding, Semi ist) ; subtype (time,Semlist) ) , (, 

\+ actionchk (Semlist) . % april 23, restored 

% mode 4 - skip undefined words and try to parse according to mode 1 
recovery ( 4 , S , _ , _ , Fmt , yes , Errors , Undef , C 1 , Sect , Pmode , Examtype , _) : - 
Undef \= [] , 

( Pmode =s bp ; Pmode = mode 4 ; 

Pmode = bpseg; Pmode = bp skip; Pmode - mode4 
) , 

preprocess (S , Bs , Semlist , bpskip) , 

dosent(S,Bs, Semlist, Fmtl, Message, Sect, Examtype, mode4,_) , !, 
recovery {_,Bs,Bs, Semlist, Fmt2, Message, Errors, [] , [) ,Sect., 

bpskip, Examtype, Sentno) , % try alternatives if neccy 
append ( Fmtl , Fmt2 , Fmt ) . 

% mode 3 - try longest parsed segment; partition rest of 
% sentence using mode 5 for parse mode bp 

recovery ( 3 , S , Bs., Fmt , yes , Errors , Undef , Unsents , Sect , Pmode, Examtype , _) : - 
% allowable modes for choosing longest segment 
(Pmode = bp; Pmode » bpskip; 

Pmode s skip; Pmode ^ mode 3; Pmode s mode4; 

Pmode = bpseg3 ; Pmode = bpseg 
) , 

(Pmode ss bpskip, Pmodemod » mode4__3; 
Pmodemod « mode3 
) , 

checkst (semjpattem,_,s, Target, Bs, Rest) , %check symbol table 
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%dooresulc: (Target , Fmtl , Examtype , Sect , Pmodemod, _) , 
fortnatresult (Target , Pmodemod, Fmtl) , 
(Ptnode - mode3, Fmtlist - [] , Errors * Rest; 

recovery ( 5 , Rest , Rest , Fmt 1 ist , yes , Errors , Undef , Unsents , Sect , 
Pmode , ixamtype , _) 

), 

append (Fmtl, Fmtlist, Fmt) . 
% mode 2 segments sentence using word barrier methods. This mode is tried if 
% parse failed for original sentence/or there are undefined words 

% segment sentence using word barriers 

recovery < 2 , S , _ , _ , Fmt , yes , Errors , Unde f , Unsents . Sect , Pmode , Examtype , _) : - 
(Pmode ~ bp; Pmode = bps kip; Pmode « mode2; Pmode = skip; 
Pmode s mode2; Pmode » mode3; Pmode - mode4; 
Pmode a bpseg; Pmode = bpseg2; 
Pmode = bpseg3 
), 

segmentandparse(S,Fmt. Errors, Unsents, Sect, Pmode, Examtype, , i . 
% mode 5 - try to partition sentences by findings 
% when a finding in sentence is found, go left until first 
% modifier is found (if 2 findings are next to each other, 2nd one 
* is considered the finding and 1st is considered the modifier) 
% Repeat searching for successive findings using this method 
recovery(5, [j , C3 IJ C3 - 
recovery ( 5 , S , Bs , _ , Fmt , yes , Errors , Undef , Utiserits , Sect , 
Pmode , Examtype , _) : - 

(Pmode = bp; Pmode = bpskip; Pmode = bpseg; Pmode « keymode; 
Pmode = modes ; Pmode « negmode 

) . 

preprocess (S , Bsl , bpskip) , % skip undefined words 
actionfindingsegCBsl, Fseg, Before) .! , % get segment containing finding 
(Fseg = [} , Errors « S, ! ; % no finding to segment 

%Before » [] , Errors = Bs, Fmtl * C3 , J ; % this part was tried 

preprocess (Fseg,Bseg,_, Semi is t, bpskip) , 

dosent ( Fseg , Bseg , Semlis t , Fmtl , Message , Sect , Examtype , 
modes, ) % try to parse finding segment 

). " 

(Before « [J, Beforel « [] , Message « yes, t; * no segmenting yet - 

skip beg. 

Message « yes, Beforel » Before, i; %don't add have to skip 

more 

append (Before, [* . '] , Beforel) 
), 

( Fseg - [] , Fmt =[],!; % no finding left in sent. - don't recover 
recoverrest (Fseg,_, Beforel , Fmt 2 , Message, Errors , 
Sect , Newmode , Examtype , _) , 

% recover remainder 

append ( Fmtl , Fmt2 , Fmt ) 
) - 

% nothing could be recovered; all input -> Errors ; Format is C3 
recovery Sents,_,_^, [] , yes, Sent s , Undef, [] • 

% part of phrase was skipped, add period and treated skipped part as a 
% sentence 

t recoverrest (+SegTnent,+Semlist, +Before, - Fmt , +Message, - Failures, +Sect ion, 
% +Mode , + Examtype , _ ) 

% Segment is part of sentence with a finding 
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% Semlist is a list of semantic categories for that sentence part 

% Before is the part of sentence before Segment 

% Fmt is the format for this segment 

% Message is »no' if there is no segmantic information to be recovered 

% Message is 'yes* otherwise 

% Failures are lists of segment (s) that could not be parsed successfully 

% Section is section being processed. Mode is user specified parsing mode 

% Examtype is domain 

recoverrest (_,_, Before, (] .no, Beforel._, 

(Before = (] , Beforel « C) , i ; % nothing was skipped 
append (Before. [* .*] .Beforel) 

). J. 

% nothing left to recover; write phrase that was skipped 
recoverrest ( [1 Before, [J , yes, Beforel _) :- 

(Before « (] , Beforel « CI * ! ; " 

append (Before. £' . ' ) .Beforel) ^ 

) , ! . 

% can recover partial parse 

recoverrest (Bs, Before, Fmt, yes, Errors, Sect, Pmode , Examtype . _) : - 

checks t (sem_pattern,_,s, Target, Bs, Res tseg) . % recover from symbol tab. 
%doresult (Target , Fmtl , Examtype , Sect , modeS , _) , 

formatresult (Target .modes, Fmt 1) , 
recovery ( 5 , Restseg, Rest , Fmt2 , yes , Error2 , 

[],[}, Sect, Pmode , Examtype , _) . 
append (Fmtl, Fmt 2 , Fmt) , 

(Before - (1/ Errors = Error2, I; %nothing skipped to add to 
append (Before, t • . • |Error23 .Errors) 
) . 

% cannot recover partial pdrse - skip first element and retry 

* if 1st element is a negation semantic type, skip 2nd element instead 
% Handles case where 1st element is. a negation. certainty or status 

% add 2nd element to unparsed sentences list (enlcosed in angle brackets) . 

recoverrest ( ex. Y I Restseg 3 Beforel, Fmt .yes. Errors, 
Sect , Pmode , Examtype , _) : - 
f oundword (X. Semi , Tar) , 

{ member(Seml. [neg, certainty, vcertainty.vconn, status, vstatus] ) ; 
Semi s p, Tar = C_,connl 

) , 

%(Mod = neg; Mod = certainty; Mod = status; Mod =« vcertainty) , % leave 
this mod in 

preprocess ( (X I Restseg] , FsegO,_,_,bpskip) , % skip tmdefined words 
f indingseg (FsegO. Fseg, Before2) , % get finding seg 

(Fseg = [] , Errors » [X.YjRestseg] , Fmt = [] ; % no finding 
preprocess (Fseg, Bseg,_, Rest sem, bpskip) , % skip undefined words 
dosent ( Fseg , Bseg , Restsem, Fmtl , Message , Sect , _ , Examtype , 

modes, _) , % try to parse finding segment 
recoverrest (Fseg, [y|Before2] , Fmt 2, Message, Error2, 

Sect .negmode, Examtype, _) , % recover remainder 
(Beforel - C] , Errors = Error2, i; 
append (Beforel, [. |Error2) .Errors) 
) , 

append ( Fmt 1 , Fmt 2 , Fmt ) 
). 

* skip 1st element; enclose it in brackets 
recoverrest { [X I Restseg] ,_.Beforel, Fmt, yes, Errors, 

Sect , Pmode , Examtype / _ ) : - 
preprocess (Restseg, FsegO, bpskip) , 
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findingseg(FsegO,Fseg,Before2) , !, % get finding seg 
append (Bef Orel, (x|Before2) .Before) , 

(Fseg - (1, Errors « [X|Restseg], Pmt « {] ; % no finding 
preprocess (Pseg,Bseg,_,Restsem,bpskip) , 
dosent (Fseg , Bseg , Restsem, Fmti , Message , Sect , Examtype, 

modes, % try to parse finding segment 
recoverrest ( Fseg, Before, Pmt 2, Message, Errors, 

Sect , Newmode , Examtype , Ji , % recover remainder 
append < Fmt l , Fmt2 , Fmt ) ~ 
) . 



% no semantic information left; return Errors 
recoverrest ( (Xj Restsegl , [) , Bef orel, Fmt , yes, [XjRestseg], 
Sect , Pmode , Examtype , - 

%dosent (-t-S, +Bs, +Semlist , -Fmtlist, -i-Message. -^Section, +WriteMessage, -(-Examtype, 
% -^Mode) 

% S is original list of words in sentence; Bs is list after lexical lookup 

% Semlist is list of semantic categories corresponding to Bs 

% Fmtlist is list of target forms for sentence 

% Message is 'yes' if the output from parser signals a failure, 

% and 'no* otherwise 

% Section is section of examination being processed 

% WriteMessage signals whether an error occurred in generating target form 

% Examtype is the domain, and Mode is the user specified mode of parsing 

% Parse sentence and returns target in nested format 

% Handles case where sentence should be skipped because info is about 

% family member or peripheral to patient 

dosent (S,_, Semlist, [], Error, 

skipsentence (S, Semlist , Error) , ! . 
dosent { S , Bs , Semlist , Pmtlist , Errormsg , Sect ion , Write fail , Examtype , Mode , _) : - 
attemptparse (P,Bs, sentence, Semlist, Section, Atotal) , 
( P = (failure] , Errormsg « yes, Writefail « no, I % parse failure 

P =t [) , Errormsg * no, Writefail = no, Fmtlist « t] , ! % empty target 

%doresult (P, Pmtlist, Examtype, Section, Mode,_) , 
formatresult (P, Mode, Fmtlist) , 
Erronnsg » no, Writefail = no, J 

Errormsg = yes, Writefail = yes, i 

) . 

%parse_sentences (Beg^Beg, t) , t] ! . 

* attemptparse (-P, +Bs, +Structure, -t-Semlist, -Ptype, -TotaJL) 
% P is output from parser 

% Bs is list of words in sentence after lexical lookup 

% Structure is name of structure to be parsed 

% Semlist is list of semantic categories corresponding to elements in Bs 

% Total is number of times parser reached sem_sent in gramimar; 

% where sem_sent is highest level predicate in grammar 

% don't parse if sentence consists of only '.' or ';' 
attemptparse ((] ,Bs, ) 
Bs a ['.•]; Bs = t';'3T 

% if a template exists for whole sentence, get parse from it 



37 



BNSDOCtD <WO_00e3667A1J_> 



wo 00/63687 



PCT/USOO/10302 



attemptparse(P,Bs, sentence, : - 

Bs s tx,'.*], is_list(X), % the whole sentence is a finding 
f ind_sem_sent ( P , X ) , ! . 

% parses and retracts well formed string table - parses sentence 
attemptparse (P, Bs, sentence , Semi is t« Ftype , Atotal } : - 

retractalKwfst ) , 

ret rac tall (adds total (_T) , 

sem_sent (P, Semi is t, Atotal, Bs, C) > / i . 

* parses and retracts well formed string table - parses bodypart only 
attemptparse (P,Bs,bodypart, :- 

sem_bodyloc (P, Bs, C] ) , 

retractall (wfst (_,_, ,_)), !. 



tsegmentandparse (+Sentences, -Fmtlist, - Failures, -Unsent, +Section, +Mode, 

* +Examtype, +Sentno) 

% Sentences is list of sentence segments. 

% Fmtlist consists of the formatted output for the segments 

% Failures is the list of unparsed segments. 

% Unsent is the list of segments with undefined words . 

% Section is the section being processed. Mode is the user specified mode 

% Examtype is the domain and Sentno is the sentence id. 

segmentandparse {[],[], f] ,[],_,_,_, _) r- !. 

segmentandparse (Sentences, Fmtlist, Failures, UnSent, Section, Mode, 
Exauntype. Sentno) :- 
get_sentence (Sentences, S, Rest) , !, %sentence to segment 
preprocess (S,Sl,_,Semlist,Mode) , !, 
(Mode s mode2, NewPmode s bpseg2, ! ; 
Mode * mode3 , NewPmode » bpsegB , ! ; 
NewPmode = bps eg 
), 

( segmentKsl, Segs, [] , seg) , !, . - 

parse_sentences (Segs, FmtX, Fails, Unl , Sect ion , NewPmode , Examtype , 
Sentno, Sentno, 0) , ! 
; segnient2 (Sl,Segs, (] ,seg) , i , 

parse_sentences (Segs , Fmtl , Fails Unl , Section, NewPmode , Examtype , 
Sentno, Sentno, 0) , ! 
; segments (SI, Segs, [], Negstatus , seg) , !, 

parse_sentences (Segs, Fmtl, Fails, Unl, Section, NewPmode, Examtype, 
Sentno , Sentno , 0 ) , ! 

), 

% fails if cannot segment sentence; otherwise segments remainder 
segmentandparse (Rest, Fmt 2, Nexterrors,NextUns, Section, Mode, 

Examtype, Sentno) , 
append ( Fmtl, Fmt2 , Fmtlist) , 
append (Unl , NextUns , UnSent) , 
append ( Fails, Nexterrors, Failures) , t . 

%segmentl (+S, -Segs, +Beg, -t-Message) 

% S is list of words in sentence 

% Segs consists of sentence segments as separate sentences 

* Beg is list of words in sentence prior to the current portion of sentenc 
% Message is 'seg* if segmenting succeeded and 'noseg' otherwise 
segmentl ( {] , CI noseg) l. 

% segment sentence at connect phrase /word or at most conjtmctions 
% if negation precedes, restore negation 
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segment! { CX I Rest) ,['.', •<eos>' |Rem] ,Beg,seg) :- 

\+ sem_endtnark(Rest, [] ) , % don't segment if at end already 
foundword(X, Sem, Target) , % get semantic classification and target 
( X - nor, append ( [no] , Rest ^ Rem) % ok to segment at nor 

;X = without, append! [no] , Rest, Rem) % ok to segment at without 

% ;X * ' : ' , Rest = Rem 

; Sem * neg. Rest = [Next | Rest2) , % have negation; test word after 
foundword (Next,Sem2,Target2) , % for connective - add back negation 
testforconn(Next,Sem2,Target2) , Rem = tX|Rest2) 
; testforconn<X, Sem, Target ) , Rest = Rem 
) . 

segmentK tX|Rest3 , tX|Newrest] , Start, Seg) 

append (Start, [X] , Beg) , % part before segmentation 
segmentl (Rest, Newrest, Beg, Seg) . 

testforconn(X, Sem, Target) :- 

( Sem * p, Target » lP,conn],P\= with % segment at connective prep 
; member (Sem, [vconn, vshow} ) % segment at these types of verbs 
; Sem « con j , \+ member (X, [and, or, ',','/'» as] ) 
) . 

% segment at certain words - 
segment2 ( [] , C3 , CI ,noseg) !. 

segment 2 (S, Segs, [], seg) 

seg2 (S, Rest, Segs) , 
\+ sem_endmark (Rest, []),!. 
segment2 ( [X | Rest] , [X | Newrest] , [3 . Seg) : - 
segment 2 (Rest , Newrest, [) ,Seg) . 

seg2( [X|Rest) ,Rest, E' . * , *<eos>' ) 

member ( X , [which , that , un t i I . where , when , whi 1 e , who , 
'{*,•)■ .between, whereby. after, before, prior, 
greater, ranging] ) , 
Rem = Rest, I - 

segments ([],[],__,_, noseg) i . 

% segment at conjunction - if negation preceded conjunction, add 
segments ( [X|Rest] ,Rem,Beg,Negstatus,seg) 

\+ sem^endmark (Rest, []),!, % already at end of sentence 
seg3 { [X I Rest] , Rem, Beg, Negstatus , seg) , i . 

seg3 ( [XjRest] ,Rem,Beg,Negstatus,seg) 
wdef (X, conj , 
member (X, [and, or, ',']), 

(nonvar (Negstatus) , Rem = ( • . • ,Negstatus|Rest) , ! %restore negation 
; Rem = <eos> • | Rest 1 , • 

) . 

seg3(EXlRest] , [;c, ' . ' , '<eos>' [Rest] ,_,_,seg) :- 
foundword(X,age) , !. 

seg3 ( [X I Rest] , [X I Newrest] , Start, Negstatus, Seg) 

( nonvar (Negstatus) , !; % 1st neg already found - continue segmenting 
f oundword (X , Sem, Target ) , ! , 

( Target = no, Negstatus = X, it 
Sem = neg, Negstatus = X, i ; 
Sem \« neg. Target \= no, i 

) ; 
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true, < % word is undefined 
) , 

append (Start, [X] , Beg) , % part before segmentation 
segments ( Res t.Newrest, Beg, Negs ta tus, Seg) . !. 

% for finding type classes - parse as a sentence 
wha tt ©parse (Sem,P, Sent) :- 

member (Sem, [cfinding,p£inding«morph, disease, device. procmproc, descriptor] ) , 
at tempt pa rse (P, Sent, sentence, CSem] , impress ion , . 

\ for bodyloc classes - parse as a bodyloc modifier 
what toparse (Sem, p, Sent) 

member(Sem, [bodyloc, region, side, position) ) , 

attemptparse (P , Sent , bodypart 
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% file radrec.pl 
% September 7, 1999 
% fail an unknovm predicate 
: -unknown fail) . 

- op(900, ty, C\+, not, once] ) , % same priority and type as \+ 

- op(700, xfx, C\'=,-«l) . % same priority and type as = or == 

- dynamic (doma in/ 1) . % domain being processed 

- dynamic (outputform/l) . % form of output {needed to distinguish 

% markup of text from formatting forms 
dynamic (current sect /I ) . % section for outputting results 

test_genome(Oucfile,Errfil€, Unfile) : - 

get_inputsents { (] , Toklist) , i, % read in and tokenize input 
(Toklist ~ (1 , ! , % error condition 
app_errl (_,Outf ile, 'No input sent'), I 

parse_sentences (Toklist , Fmt list. Failed, Undef,UnSent, impression, 
bp, genome, 0) , ! , 

outputresul t s ( Fmt 1 ist , Fai led , Er r f i 1 e , Undef , Unf i 1 e , UnSent , Out file , 
full, line, genome, 1, 0, ,exe, plain) 

) , 

output resul ts ( Pmt lis to. Failed, Errfile, Undef .Unfile, UnSent, Out file, 

Amount , Type , Exam, Compno , DocComp , NewCompno , Caller, Protocol ) 

tell(Outfile) , 

(Protocol - sgral, ! , Op = sgml; 
Caller = server, ! , Op = sgml; 
Op s plain) , 

(Type = nested, i , % original output form - nested findings 
write ( ' <nested> ' ) , new_line (Op) , 
write (Pmtlist) , new^line (Op) , write (' </nested>' ) , 
new line (Op) , ! 

> , 

(Caller = server, 

write_message (Unf ile, Undef , Caller, * <undef ined> ' , • </undef ined> * ) 

i 

Caller « exe, Undef \a (1, 

write_message(Uhfile,Undef, Caller, ****** Undefined Words *****•,[]) 
%write_highlight ( C] , UnSent, Cal ler) 

true 

) , 

(Caller = server, 
write ( • <noparae> ' ) , ! , 
write_highlight (Undef , UnSent , Caller) , 

write^highlight ( t] i Failed, Caller) , write ( ' </noparse> • ) 

Caller = exe, Errfile \a (1, Failed \* C] , 
tell (Errf ile) , 

writeC***** Sentences/Phrases Not Parsed **♦**•), nl, 
%write_highlight (Undef , UnSent, Caller) , 
write^highlight ( t] , Failed, Caller) 

true % no Errf ile to write to 
> . 

% set^args: Process options 
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% Argument options 

% ' -p ProbFile (otherwise default is problem messages are not written to file) 

% -i Infile (if input is supplied by file and not standard input 

% -m Mode (default is bp; the 6 choices are bp, model - modeS) 

% -o Out file (if output should be file and not standard output) 

% -? Provide list of default arguments 

% -pr Protocol - sgml or plain (default is plain) 

% -u Undefs (otherwise default is - undefined messages are not written 
% to a file) 

set_args ( Args , Mode , Infile , Outf ile , Prbf ile , Undef , Protocol ) : - 
set_mode (Args , Mode) , set_amount (Args , Amount) , 
set jprotocol (Args , Protocol ) , 

set_infile (Args, Infile) ^ set_outf ile (Args, Out file) , 
set_prbf ile (Args, Prbfile) , set_undefs (Args, Undef) . 

se t_mode ( Arg s , Mode ) : - 

(nextto( ' -m' ,M, Args) ; nextto (m, M, Args) ) , i, 

mode i s (M, Mode) , ! . 
set_mode bp) . % default output type 

mode is (relax, mode2) 1. 
modeis (strict , model) :- !. 
modeis (sJcip, mode4) t. 
modeis (longest, mode3) :- !. 
modeis (best , bp) :- !. 
models (model , model) !. 
modeis (mode 2, mode 2) !. 
mode i s {mode3 , mode3 ) : - i . 
mode i s ( mode4 , mode4 ) : - ! . 
model s ( modes , mode 5 ) : - ! . 

set_protocol (Args , Protocol ) : - 

(nextto ( ' -pr ' , Protocol , Args) ; nextto ( ' pr • , Protocol , Args) ) , 
member (Protocol , [sgml , plain] ) , 1 . 
set_protocol plain) . 
set_undef s (Args , Undefs) : - 

nextto (• -u Undef s, Args) ; nextto (u, Undef s, Args) , !. % undef file option 
set_undef s (_, (] ) . % default is no file of undef ineds created 

set_inf ile (Args, Infile) :- 

nonvar (Infile) , !; % Infile is set already 

nextto (' -i Infile, Args) , !; . 

nextto (i, Infile, Args) , !. 
set_inf ile (_,user_input) . % default is standard input 

set_prbf ile (Args, Prbf ile) 

nextto (• -p' , Prbf ile, Args) , !; nextto (p, Prbf ile, Args) , !. % prob file option 
set _prbfile(_, [J). % default is no file of problems is created 

set_outf ile (Args , Outf ile) : - 

nonvar (Outf ile) , I; % Outf ile is already set 

nextto ( »-o' , Outf ile, Args) , ! ; nextto (o, Outf ile, Args) , !. % outfile option 
set_outf ile (_,user_output) . % default is standard output 

new_line (sgml ) . : - write ( ' <br> ' ) , nl , i , 
new^line (server) : - write ( • <br> • ) ,nl , • . 
new line(exe) ;- nl. 
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new_line (plain) nl . ' 
write_message(_, (3 ,exe,_,_) !. 
write_message ( (I exe,_,__) J. 
write__message [j ,plain,_,_) !■ 
write_message ( n ,_/plain,_,_) :- !. 

write_message (File, Contents, Caller, Begmsg.Endmsg) : - 
( member (Caller, [exe, plain] ) , tell(File), ! 

true) , 

write (Begmsg) , new_line (Caller) , 

(Contents » [J; write_list (Contents, 1) , new_line (Caller) 
) , 

(Endmsg = [] , ! ; 

write (Endmsg) , I , new_line (Caller) 
) . 

sentend ( IX |J , Caller) : - 

member (X, \ •;♦,•?']> , new_line (Caller) , !. 



gettargets< [],{]):-{. 

gettargets( [ignore I Rest] , t ignore I Rest} ) % possibly ignore info. 

gettargets( (wi|Rest] , [Tl(Trest3 ) 

foundword(Wl,_,Ti) , % target for Wl 
gettargets (Rest,Trest) , l . 
gettargets (W,W) » % not in lexicon 
isneg(X) :- 

intersect (X, [no, negative, deny, 'rule out']) . 

writeoutsent ( [Word | Rest} > : - 

write (•*'•), write (Word) , write (•''•), ! , 
(Word - " ' ' , write <'••'). ! ; true) , 
(Rest \« [) , writeCMi J, writeoutsent (Rest) , !; . 
true) , i . 
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% This file contains predicates associated with SGML tags 
% nextTag(+L,Tag, -PreTag, -PostTag) is true if 
% L is the starting List 

% Tag is an SGML tag; it could be a variable or instantiated already 
% PreTag is portion of L preceding Tag 
% PostTag is portion of L following Tag 
nextTag ( L , Tag , PreTag , Pos tTag ) : - 

append (PreTag, [ • < • , Tag, ' > ' | PostTag] , L) . 

% endTag(+L, +Tag, -Pre, -Post) is true if 
% L is the starting list 

% Tag is the SGML end tag 

% Pre is the portion of L preceding the end of tag 

% Post is the portion of L following the end of tag 
endTag (L, Tag, Pre, Post) :- 

append ( (Pre, ( • < ' , • / ' . Tag, • > ' ] . Post] , L) . 

% enclosedPart (+L, +Tag, -Enclosed) is true if 

% L is the starting List; it is assumed that L is portion of some 

% list that follows a begin tag - i.e. •<',Tag|L 

% Tag is the SGML tag 

% Enclosed is the portion of text enclosed in tag; not including 

% end tag. 

enc?osedPart (L, Tag, Enclosed, Post) :- 
endTag (L, Tag, Enclosed, Post) . 
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% file useful.pl - lexical lookup and utility tools 

; -unknown fail) . 

: -dynamic {sentence/!} . 

op(900, fy, {not, once]). % same priority and type as \+ 
op(700, xfx, t\s,-=3). % same priority and type as « or 

% useful.pl February 21, 1992 

% 

% preprocess {+S, +Bsl, -U, -Sem3 , +Mode) : preprocesses sentence to 



% bracket lexical phrases and remove words/phrases in 

% special db of noise words (nosem in nsphrase.pl db) 

% S is original sentence 

% Bsl is preprocessed sentence 

% U is list of undefined words in sentence 

* Mode is mode of process - in skip mode undefined words are removed 

% from preprocessed sentence 



preprocess (SO, Bsl,U, Sem3 »Mode) :- %cfnew 

checkbeg (SO, S) , % if beginning is *A) ' ignore 

checkphrase (S, SI, Semi) , % bracket all phrases in phrasal lexicon first 
checklist (SI, Ul,Bs,Sem2, Mode) , % check that all words are in lexicon, remove 
non semantic 

checklist (Bs,0, Bsl, Sem3, Mode) . % check for phrases after non-sem are removed 
%append ( Semi , Sem2 , Semi ) , 
%append(S€ml,Sem3,Semlist) , 

%union (Ul ,V2,U) . 

% found checks if word X is defined as a single word, or if X starts a defined 
% phrase 
foundword(X) :- 

wdef(X,_,J, 
foundword(X) :- 

semw(X, 1 . 
%definition from tagged input 
foundword(X) 

phr(X,_,_,J, !. 
foiindword( EX|Rest) ) :- 
Rest \= [) , 
phrasal {X,_, [X I Rest) ,_) , !. 
% 3/99 added foundword to search the new semact.pl lexicon 
% phrasal using semp was added to util.lp 
% found/2 returns semantic cat. of word 
foundword (X , Sem) : - 
wdef (X^Sem, J . 
foundword ( X , Sem) : - 

semw{X,Sem,_,_) . 
%definition from tagged input 
foundword ( X , Sem) : - . 

phr(X,Sem, [] , j . 
foundword ( (X | Rest] , Sem) : - 

phrasal (X, Sem, tX|Restl ,_) . 
% foijuid/3 returns semantic cat. and target form 
foundword ( X , S em , Form ) : - 

wdef(X, Sem, Form) . 
foundword (X, Sem, Form) 

semw (X, Sem, Form, _) - 
%definition from tagged input 
foiindword(X, Sem, Form, __) 

phr(X,Sem, [] ,Form) . 
foundword ( CX | Restl , Sem, Form) : - 
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phrasal <X, Sem, (X|Restl , Form) . 

%collectsem(+Word, -Sem) : sem is the list of semantic classes corresponding 
% to Word 
collectsem (Word, Sem) 

setof (X, foundword(Word,X) , Sem) . 
% missing checks if a word present in a sentence is defined 
missing (X) :- 

member (X,S) , 

not fo\indword(X) , 

% chec)cbeg (+30, -S) chec)cs beginning of sentence; if it begins with a letter or 
% number followed by a ' ) * , that part is skipped 
checkbeg( (X, ' ) • jRestJ ,Rest) :- !. 
checkbeg(X,X) . 

% checks every word in a list to see if it is defined; creates 
% a new list of words not defined, and a new list of sentence 
% where phrases are bracketed. ■ 
checklist {[] n,_) - 

% if X is a list it has already been identified as a phrase in phrasal lex 
checklist ( tX|Rest] , Undef , Newrest, Semlist,Mode) : - 
is_list (X) , 

check_no_sem ( [X | Rest] , Restl , _) , 

checklist (Resti,Undef, Newrest, Semlist, Mode) , !. %is phrase part of nosem 
checklist ( [X I Restl ,Undef, (X |Newrestl , Semlist , Mode) :- 
%collectsecn(X, Sem) , 
is_list(X), X = [Wl|Tail3, 
phrasal (Wl, Sem, X,_) , 

checklist (Rest, Undef,Newrest,Sem2, Mode) , !, 

append ( [Sem] , Sem2 , Semlist) . 
checklist ( [without | Rest] ,Undef,Newrest, Semlist, Mode) 

checklist ( [with, no I Rest] ,Undef,Newr est, Semlist, Mode) . 
% this problem has to be fixed in preprocessor 
% check for a number with a * - "11,200" and fix it 
%checklist ( [X, • , • ,y|Restl ,Undef , [N|Newrest] , [number | Semlist] .Mode) 
% number (X), number (Y) , N is X * 1000 + Y, i, 
% checklist (Rest, Undef,Newrest, Semlist, Mode) , I, 
% check for a literal number %cfnew 

checklist ( [X|Rest] ,Undef , tX|Newrest] , [number | Semlist] ,Mode) :- 

number (X) , 

checklist (Rest, Undef, Newrest, Semlist, Mode) , ! . 
% beginning of List is a prefix of a phrase that is a complete finding 
checklist (List, Undef, [Phrase |Newrest] , (cfinding | Semlist] ,Mode) 

check_sem_f inding (List , Rest , Phrase) , 

checklist (Rest, Undef ,Newrest, Semlist, Mode) , l. 
% beginning of List is a prefix of a phrase that is in nosemantic lexicon 
checklist (List,Undef,Newrest, Semlist , Mode) : - 

check_no_sem(List, Rest, Phrase) , 

checklist (Rest, Undef ,Newrest, Semlist, Mode) , !. 
% beginning of List is a prefix of a phrase that is in plirasal ' lexicon 
checklist (List, Undef, [Phrase I Newrest] , Semlist, Mode) :- 

get_longest_sem (List , Rest , Phrase , Sem) , 

%check_sem (List , Rest , Phrase , Sem) , %change to get longest phrase 

checklist (Rest, undef ,Newrest,Sem2, Mode) , i , 

append ( Sem , Sem2 , Semi ist ) . 
% beginning of List is a single word that -is in semantic lexicon 
checklist { [X|Rest} , Undef, [XjNewrest] , Semlist, Mode) : - 
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collectsem(X, Sem) , !, 
%f oundword (X , Sem) , i , 

checklist (Rest, Undef, Me wrest, Sem2, Mode) , I, 
append (Sem, Sem2 , Semi ist) . 
% beginning of List is an undefined word 
checklist ( [X|Restl , Undefs, Nrest , Semi ist , Mode) 

checklist (Rest ,Undef,Newrest, Semlist, Mode) , 
(member(X,Ondef ) . !; Undefs = [X|Undef], !), 
(Mode - skip, i, Nrest = Newrest ; 
Mode a bpskip, i, Nrest * Newrest; 
Nrest = tX I Newrest] ) , ! . 

% if beginning is a number followed by a . followed by a non number 
'% skip; %cfnew 

checkphrase( tX, J , CX, .] , Cn :- !, 
checkphraseC CX, . , Z|Restl ,Y,Semlist) 

number(X), not (number (Z) ) , checkphrase (Rest , Y. Semi ist) , !. 
* beginning of List is a prefix of a phrase that is a complete finding 
% or a phrase in phrasal lexicon 
checkphrase (List, (Phrase (Newrest] ,Semlist} 

(check_sem_finding(List, Rest, Phrase) . Sem* [cf inding) ; 
get_longest sem (List , Rest , Phrase , Sem) 

) , J , 

%check_sem (List , Rest , Phrase , Sem) ) , ! , 

checkphrase (Rest , Newrest , Sem2) , !, 

append (Sem, Sem2, Semlist ) . 
checkphrase ( [w|Rest] , [W| Newrest] , Semlist) :- 

checkphrase (Rest , Newrest , Semlist) . 
checkphrase ( n , CI , [) ) . 

check_sem_f inding ( CW I Tail] ,Tail,W) 

W > [WljRest], % W is bracketed already 

sem_finding__sent(Wi,W,_) . 
check_sem_f inding ( [W} Tail) , Sf inal , Phrase) : - 

sem_f inding_sent (W, Phrase, , 

begsublist (Phrase, tw| Tail) 7sf inal) , 1 . 
sem_finding_sent (__,_,_) fail, 

% check_n6_sem(+Sent, -Rest, -Phrase) : removes Phrase from Sent resulting 
% in Rest if Sent begins with a phrase in nosem (non-semantic list). 
check_no_sem { [W jTail] , S final , Phrase) : - 

nosem (W, Phrase) , %phrase beg. with W that should be removed 

begsublist (Phrase, [W jTail] ,S1) , 

remove_comma{Sl,Sfinal) , i. % remove if it is next 
%get_longest_sem(+Sent, -Rest, -Phrase, -Sem) Phrase is longest phrase that is 
% a prefix of Sent; Rest is remainder and Sem is list of semantic classes 
get_longest_sem{ Sent, Rest, Phrase, [SemJ ) : - 

setof (X,check_sem(Sent,X) ,L) , % set of Phrases 
maxphrase(L, [], Phrase, 0) , % Phrase with maximum length 

append (Phrase, Rest, Sent) , % rest of sentence after Phrase 

foundword (Phrase, Sem) . 

% check_sem(+Sent, -Rest, -Phrase, -Sem) : checks if phrase beginning with 

% "sent is in phrasal lexicon; Rest is the remainder of Sent after phrase 

% Sem is the semantic class 

check_sem( [w|Tail] ,Rest, Phrase, Sem) 

phrasal (W, Sera, Phrase, _) , 

begsubl is t (Phrase, [WjTail] ,Rest} . 



47 



BNSDOCIO <WO_00e3e87A1J_> 



wo 00/63687 ' PCTAiSOO/10302 

% this also obtains the Target fortn 

check_seni( [W|Tail] , Rest, Phrase, Sem, Target) :- 

phrasal (W. Sem. Phrase, Target) , 

begsubl ist { Phrase , (W | Tail ] , Rest ) . 
check_sem< [W|Tail3 ,Tail, w,Setn) :- 

is^list (W) , %encIosed in brackets means it is a phrase 

W « (Wl I Rest J , 

phrasal (Wl,Se!n,w,_) , !. 
check_sem( [W|Tail] , Tail, w,Sem, Target) :- 

is_list(w>, %enclosed in brackets means it is a phrase 

W « (wi|Restl , 

phrasal (Ml, Sem, w, Target) , ! . 
% check_sein(+Sentence, -Phrase) is similar to check_sem/4 except for fewer args 
check_sem { Sentence , Phrase ) : - 

check_sem (Sentence , Phrase , _) . 
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% file util.pl 

%%%%%%%%%%%%%%%% Utility Predicates %%%%%%%%%%% 

% fail an unknown predicate 
; -unknown fail) . 

op(900, fy, [not, once]). 

op{700, xfx, [\«, -=] ) . • 

;- dynamic (wf St /6 } . 
:- dynamic (addstotal/1) . 
:- dynamic (paragno/1) . 

dynamic {sectno/D . 

dynamic (phr/4) . 

% wfst (+Rule, ^Number, +Res, -i-Fmt , +S0, +S) : well-formed symbol table 
% Rule is the name of rule; Number is the option number 

% Res is s for success and f for failure 

% Fmt is the format (for successes); for failure Fmt is [] 

% SO is the sentence position at the start of Rule 

% s is the sentence position when Rule has been completed 

% add to wfst 

addst (Rule, Number, Res, Fmt, SO, S) 

\+ checkst (Rule, Number, Res, Fmt, SO, S) , %result for rule was saved already 
' \+ checkst (Rule, Number, i, Fmt, SO, S) , % result from different rule saved 
( checkst (Rule,_,Res,Fmt,SO,S) , % different rule produced same result 

assert (wfst (Rule, Number , i, Fmt. SO, S) } ; 
assert (wfst (Rule, Number, Res, Fmt, SO, S) ) ) , i . 
addst - !. % always succeed 

% checkst (+Rule, -Number, -Res, -Fmt, +S0, -S> : checks to see if rule has been saved 
% in wfst 

checkst { Rule , Number , Res , Fmt , SO, S ) : - 
wfst (Rule, Number. Res, Fmt , SO, S) . 

% beglist(L,y) - is Y the head of list L 
beglist ( [XjJ ,Y) :- X = Y , !. 

% splice (+L1, -L2) : LI is a list of lists; L2 is merged list 
splice (L1,L2) :- append (LI, L2) , I. 
%splice( [] , []> ! . 
%splice( [{]] ,U) ! . 
%splice( (X3 ,X) J, 

%splice( [ [] |L1] ,L2) splice (LI , L2) ,! , 

%splice( [CCD |L1) ,L2) splice (LI, L2) , 1 . 

%splice([xUC]31,L) :- splice (X,L) ,! , 

%splice( [L1,L2] ,L3) 

% append(Ll,L2,L3) , !. 

%splice( [X|L1) ,L2) :- 

% splice<Ll,L3) . 

% append (X,L3,L2) , 1 . 

%splicerel - works with relations which have Argl, . . . ,Argn. 
% It splices a Splicelist in each arg of relation 

splicerel (Finding, Splicelist, Spliced) : - 
splice (Splicelist, Spl) , 

(Finding = trel,X|Rest3 , spliceargs (Rest,Spl,Sp) , 
%splice ( t Irel , X] , Sp] , Spliced) , ! ; 



% same priority and type as \+ 
% same priority and type as » or 
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append { [rel,x3 ,Sp, Spliced) » i ; 

%splice( (Finding, Spl J , Spliced) ) . 

append (Finding, Spl, Spliced) ) , 
%spliceargs - Splices a list into each element of a list 
spliceargs( (3 [] ) i-x, 

spliceargs ( (Argl [Rest j , Splicelist, Spliced) : - 

%splice( (Argl,SplicelistJ ,Sargi) , 

append (Argx.Splicelist,Sargl) , 

spliceargs (Rest,Splicelist,Srest) , 

%splice ( [ tSargi) , Srest) , Spliced) . 

append ( (Sargl] , Srest, Spliced) . 
list([J , (3) . 

iist((xn]],x). 

list ( (X|L11 ,L2) :- list (LI, L3), 

append ( (X) ,L3,L2) , ! . 

% strip (LI, L2) removes extra square brackets from L 
strip (IL],L). 

% B is a suffix of A and C is the difference 
difflist {A,B, C) :- append (C,B, A) . 

% S is a sublist at beg. of L if there is a list Rest, which when appended 
% to S results in L. 

begsublist (S.L.Rest) :- append (S, Res t. L) . 

% checks that first element in list S has semantic category in Semlist 
firstword( [Wl|J , Semlist) :- 

atom(Wl) , wdef {wi,Sem,_) , % semantic category 

member (Sem, Semlist) . 
firstword( (Wi|J , Semlist) :- 

is_list(Wi) , phrasal (Wl,Sem,_,_) , 

member (Sem, Semlist) . " 
% removes phrases from first arg that are in nsphrase - lexicon of non-sem. 
phrases 

remove_no_sem ( (],[]):-!. 
remove_no_sem ( {W | Tail] , Sf inal ) : - 

nosem (W, Phrase) , %phrase beg. with W 

begsublist (Phrase, [W| Tail] , SI) , %remove from sentence 

remove_comma (SI, S2) , %remove if it is next 

remove_no_sem(S2,Sf inal) , !. 
remove_no_sem ( (W|Tail) , Sf inal) : - 

remove_no_sem (Tail, Si) , 

append ( [W] , Si . Sf inal ) , t . 
remove_comma ( t ' , ' | Tail) , Tail > . 
remove_comma(S,S) . 

% remove_sem(+Sent, -NewSent) : Sent is the original sentence, NewSent is 
% stripped of all phrases that are defined in lexicon 
remove_sem( CI . CI ) I. 
r emove__sem ( S , KewS ) : - 

check_sem{S, Rest,__,_) , % phrase in sent, is in lexicon - remove it 

remove_sem ( Res t , News ) , ! . 
remove_sem { S , News ) : - 

check_no_sem<S,Rest ,_) , % phrase in sent, is in nosem list - remove it 

removers em ( Res t , NewS ) , ! . 
remove^semT (X | Ta i 1 J , [X | News ] ) : - 

remo ve_sem( Tail, News ) , ». % not a phrase, process rest 
% remove^words (+Sent, -NewSent) : Sent is the original sentence, NewSent 
% is stripped of all words that are in lexicon 
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remove_words ({],[]):-♦. 
remove^words ( [X | Rest] , NewRest ) : - 

{ (foundwordU) ; number(X)), % X is defined in lexicon 
remoye__words (Rest, NewRest) 

remove^words (Rest. New) , NewRest = fx | New], ! % X is not in lexicon 

) . 

%maxphrase (+Listof Phrases, +Maxin, -MaxOut, InitMaxLen) is true if 
% ListofPhrase is a list of multi-word phrases, 
% Maxin is phrase with maximum words so far 

% MaxOut is phrase with maximum length of phrases in Listof Phrases 

% InitMaxLen is length of initial phrase which is of max. length 

maxphraset [], Maxin, Maxin, _) % no more phrases - maximum is same as maxin 

maxphrase ( [P | Rest] , Maxin, Maxout , InitMaxLen) : - 

length{P,Len) , * length of first phrase 

( Len > InitMaxLen, I, maxphrase (Rest, P, Maxout, Len) ; 
Len < InitMaxLen, !, maxphrase (Rest, Maxin, Maxout , InitMaxLen) 

) . 

%%%%%%%%%%%%%%%%%%%%%%%%%%% lexical interface predicates %%%%%%%%%%%%%%%%%%%%%% 

%acclex(Sem,W,SO, S) 

% outputform(htext) , J, acclexl(Sera,W,SO,S) . 
acclex (Sem, W, SO, S) :- 

acclex2 (Sem, W, SO, S) . 
acclex (Sem, W, SO, S) :- 

acclexss (Sem, Syn, Target , Features , SO , S) . 
% check lexicon for word or phrase, Target form is original W 
acclexKp. [P.C], [W | Rest] , Rest) :- 
is__list (W) , 

f ind_sem_j>hrase(pv CP,C3 ,w) . 
acclexKp, tP,C] , fw( S3 ,S) atom(W), 

wdef (W,p, CP,C)) . 
acclexl (Sem, (W] , [W{ Rest] , Rest) : - 

is_list(W), %if bracketed list, get Sem and Code from phrasal lexicon 

f ind_sem_phrase (Sem,_, W) . 

acclexl (Sem, W, [WjS] ,S) atom(W) , 

wdef (W, Sem,_) . 

% check lexicon for word or phrase. Target form is taken from lexicon 
%acclex2 (Sem, Code, (W | Rest) , Rest) :r 

* is_list(W), %if bracketed list, get Sem and Code from phrasal lexicon 

% £ind_semjphrase (Sem, Code, W) . 

acclex2 (Sem, Code, tW)S] ,S) foundword(W, Sem, Code) , 

nonvar(Code) . % protect against 

lex . error 

% find a phrase [W|Tail3 in lexicon that begins with W and has category Sem 
f ind__sem_phrase (Sem, Code, [WjTail] ) : - 

" phrasal (W, Sem, [W I Tail] , Code) , % phrase and code beg, with W 
nonvar (Code) . 

% case where phrase is already bracketed, look up plirase 
sem_f inding_phrasel (Code , [W j Tail] , Tail ) : - 

is_list(w), %phrase is bracketed 

f ind_sem__sent (Code, W) , 
nonvar (Code) . %protect against lexical error 
% case where plu-ase is already bracketed, look up phrase 
sem_f inding_phrase2 (Code, (W | Tail] , Tail ) : - 

is_list (W) , %phrase is bracketed 
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f ind_sem_sent (Code , w) , 

nonvar (Code) . %protect against lexical error 
% Phrasal succeeds if lexicon contains phrase 
phrasal (Wl , Sem, Phrase , Code) : - 

phrase (Wl.Sem, Phrase, Code, _) . %multi-word phrase in lexicon 
% added MarchlS, 1999 
phrasal (Wl, Sem, Phrase, Code) 

semp ( Wl , Sem , Phrase , Code , Features ) . 
% lexical definition from marked up input 
phrasal (Wl, Sem, [Wl I Tail] , Code) 

phr(Wl, Sem, Tail, Code) . 
acclexss (Sem, Syn, Target, Features, (W|S),S) 

acorn (W) , 

semw(W, Sem, Target , Features) , 
synw(W, Synclass) , 
member (Syncl ass , Syn) . 
acclexss (Sem, Syn, Target , Features, [W)S),S) 
is_list(W} , 

f ind^hrasess (W, Sem, Syn, Target , Features) . 
f ind^phrasess { {Wl |Tail) , Sem, Syn, Target , Features) t- 
semp (Wl, Sem, [Wl | Tail] , Target, Features) , 
synp(Wl, [Wl|Tail3 , Synclass) , 
member (Synclass, Syn) . 

% lexical definition of a complete finding 
f ind_sem_sent (Code, tWjTail) ) : - 

sem_f inding_sent ( W , [ W | Tai 1 1 , Code ) . 

listify(C, CC] ) :- 

atom (C) , I . 
listify(C,C) 

is_list (C) , 1 , 

% distributes left mods and right mods over list of findings creating 
% list of lists of findings with mods 
distributemods ([),[) ,_,_,_) !. 
distrxbutemods (Dist, (Dl | Tail] , Lmods , Rmods , Type) 

distributemods (Dist2, Tail, Lmods, Rmods, Type) , %distributed for remainder 

mergemods (Lmods , Rmods , Al lmods ) . 

frame (D, Type, Dl,Allmods) , %Type frame with mods 

append ( CD] ,Dist2, Dist) . % Combine findings to get list of findings 

% fixconj - if Leftmods has [certainty, no] , and Conj » or, change Conj to and. 
% no A or B a no A and no B; 'denies A,B, or C is similar, 

fixconj (Leftmods , Conj , [rel , and] ) ; - 

(member ( [certainty, no] , Leftmods ) ; member ( (certainty, deny] , Leftmods) ) , 

Conj » [rel , or] . 
fixconj (_, Conj , Conj ) . 

% write_sentences/l inputs a PROLOG list and prints out lines 

% which which are English sentences. No wrapping is done. 

write_sentences{ [] ) :- i. 

write_sentences ( [X] ) write (X), nl. % special sentence - section name 
write_sentences ( ['<',p, '/',*>']) 

write ( • <p/> ' ) , nl . % paragraph marlc 
write^sentences ( [x | Rest] ) : - 

upper^f irst ( [X|Rest] , CU|Rest) ) , 
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write (U), % First letter of first word made upper case 

%write(X), 

(X « U, chkforpunct (U,Rest) , !, write_terms (Rest) ; % no space needed 
write (• '), write_terms (Rest) " 
) . 



% write_sentence/2 inputs a PROLOG list and prints out an English 

% sentence wrapped. Idlen is the starting position of the sentence 

% in the output. 

% uses libraries ctypes, basic, not 



write^sentence ( tX | Rest} , Idlen) : - 
upper_first((X|Rest) , [UjRest)), 
write (U) , 

name (U, LU) , length (LU, L) . 

(U a X, chkforpunct (U. Rest) , !, write_terms (Rest , L+ Idlen) ; 
write<* ■), write_terms (Rest, L+Idlen+1) 
) . 

% write_list inputs a PROLOG list and prints out a sentence like list. 

% wrapped. Idlen is the starting position of the list in the output. 

write_list ( [X I Rest] , Idlen) 
write (X) , 

name (X , LU) , length (LU, L) , 

( chkforpunct (X.Rest) , write_tertns (Rest, L+Idlen) , !; 
write (' •), write_terms{Rest, L+Idlen+1) ) . 
%write_list (-KList, -i-Idlen, -Idlenout) 

% write_list prints out a sentence like list with wrapping if necessary. 

% List is the list to be printed 

% Idlen is the column position at start 

% Idlenout is the column position at end 

write_list ( CJ , Len,Len) !. 

write_list ( [xjRest] , Idlen, Idlenout) : - 

atomic (X) , write (X), 

name (X, LU) , length (LU, L) , 

(L + Idlen > 74, nl, Idlen2 « 1, !; 
Idlen2 = L + Idlen, ! 

) , 

(chkforpiinct (X^Rest) . write^list (Rest , Idlen2 , Idlenout) , i ; 
writeC ')/ write list (Rest, L+Idlen2+1, Idlenout) . ! 

); 

is_list (X) , write_list <X. Idlen, Idlen2) , write_list (Rest, Idlen2, Idlenout) . 

upper_f irst( CX|Rest) , CU|Rest] ) 
name(X, tL|Z]), 
(is_alpha(I.) , XJp is L - 32, ! ; Up = L) , 
name(U, [Up|23) , ! . . 

% write^terms/l writes out a word followed by blank, except for punctuations. 

write_terms( [] ) :- » , 

% case where X is end of sentence 

wrxte_terms( [XlRestD :- 

(X = ' . ' ; X =5 ' ; ' ) , % last word of sentence 

write (X), nl, !, write_sentences (Rest) , !. 
% case where X is interior of sentence 
write_terms ( tX | Rest] ) : - 
write (X) , 

(chkforpunct (X,Rest) , write_terms (Rest) ; 
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write (• *), write_ terms (Rest) 
), i. 

% write^terms (List, Used) : writes the terms in list and counts the number 
V of coluTFins used; starts new line if 75 columns have been used 

write_terms( C) ,_) !. 
% at end o£ list 

write_terms{ ( .} , _) :- write (*.'), nl, ! . 
write~terms( [;1 :- writer ;'), nl, ! . 
% X is a punctuation, don't add to final count 
write_terms ( CX | R) , Used) : - 

< R « (] , write (• write (X), l; 

chkforpunct (X,R) , 

write (X), write_terms(R,Used) , I 

) . 

% X is last word in sentence 
write_terms ( [X, . ] . Used):- 

name{X, List), length (List, Len) . 

Need is Len +2, 

Total is Used + Need, 

(Total =< 75, write {• •), write (X), write (.) ; 
Total > 75, nl, write (' •), write (X), write(.)), 
nl, ! . 

% X is last word in sentence 
writ 2_cerms ( [X. ; J , Used) : - 

name(X, List), length (List, Len), 

Need is Len 2, 

Total is Used + Need, 

(Total =< 75, write (' '), write (X), write ('; '); 
Total > 75, nl, write ( ' •), write (X), write (.)), 

nl, ! . 
% X is followed by • , • 
write_terms ( [X, » , • |Rest) , Used) : - 

name(X, List), length (List, Len), 

Need is Len +2, 

Total is Used -f Need, 

(Total 75, write (' •), write (X>, writeC,'), 
write_terms (Rest , Total); 

Total > 75, nl, write (• '), write (X), writeC,'), 

New is Need - 1, write_terms (Rest, New)), 

t 

% writes blank ♦ name of X, used is length of name-i-l 
write_terms ( CX|Rest] , Used) : - 

name(X, List), length(List, Len), 

Need is Len + 1, 

Total is Used + Need, 

(Total =< 75, write (• '), write (X), write_terms (Rest , Total); 
Total > 75, nl, write (' •),write(X), write_terms (Rest . Len)),!. 
write_terms ( [ » X.' • s • ( Rest] , Used) : - 
name{Xi List), length(List, Len). 
Need is Len + 3, 
Total is Used + Need, 

(Total =< 75, write (• wr.ite (X) , write ("' s") / 

write_terms (Rest, Total) ; 

Total > 75, nl, write(X), write_terms (Rest , Len)),!. 
% processes sentences in Infile; writes formats to Outfile 
% sentences beginning with are treated as comments 
testsents( Infile, Outfile) :- 
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see(lnfile), seen, seednfile) , 

tell (OuCf ile) . 

readtests, 

see(Infile), seen, told. 
% reads next sentence and processes it 
readtests : - 

read_in (X) , 
(X = end_of_file, ! ; 
X = [eoff , • . '] , ! ; 
X = I"], I; 

X = ( ' * ' I -3 * I , readtests ; % don * t process comments 
preprocess(X,Bs,Undef , Semi is t, skip) , 
{ Undef = C3 , 

dosent (X,Bs, Semi ist.Fmt, Message, impression, W,chestxray, strict, 0) , 
write_sentence{X, 1) , write{Bs), nl, 
write <Fmt) , nl; 

Undef \= [] , write_sentence(X, 1) , write(Bs), nl, write (Undef ) , nl) , 
readtests % read next sentence 

) . 

% Reads in all sentences from input file and creates one list of all sentences 
get_inputsents (Prevlist , Toklist ) : - 
read_in(X) , 

(X = end_of_file, Toklist * Prevlist, ! ; 
X = feoff,'.'], Toklist = Prevlist, ! ; 
X = ['•], Toklist ^ Prevlist, !; 

(last('',X), append(Toklist, t ' ' ) .X) , ! ; %remove 

append (Prevlist, X,Newlist> , 

get inputsents (Newlist, Toklist) 
>) . " 



%get_sentence (-f-A, -B, -C) 

% Gets next sentence from input list containing all sentences read in 

% Don't end a sentence if »• . " is preceded by a number and followed by 

% a number and unit measure - 1.25 cm, 1-5 cm, .5 cm 

I or is followed by a which is part of abbreviation 

% get_sentence (A,B, C) - A is list of all sentences in report 

% - B is list containing one sentence 

% - C is remainder excluding B 

\ sgml tag for multi-word phrase containing ♦ . • that is not end of sentence 
get_sentence ( t ' < ' ,phr |Tail] , Sentence, LRest) : - 

enclosedPart (Tail, phr, Between, Rem) , % Between beg. part of open phr and 
close tag of phr 

append ( [sem, « , ' , Sem, 3 / MoreAttributes , Between) , %Sem is value of sem 
attribute 

(MoreAttributes » [•>• JPhrasel , TargetList « Phrase, !; 
MoreAttributes = (t, * " ' |TargetPlusl , % Target terms plus end of phr 
append (TargetList, ['"','>• j Phrase! ,TargetPlus) . i % t attribute followed 
by actual phrase 
) . 

Phrase » [WljRest] , 

append (Phrase, SRest, Sentence) , 

concat_atom (TargetList, Target) , 

assert (phr (Wi, Sem, Rest, Target) ) , % assert lex def according to input 
%Phrase * CWl|PRest3, 
%abbrev(wi, [Wl|PRestl , Target, _) , 
get_sentence (Rem, SRest , LRest ) , ! . 
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% Ignore sentence starting with get next sentence 

get_sentence (['%',*%' | Rest) , Sent , Remainder) : - 
getsentence {Rest, Rem) , 
get_sentence (Rem, Sent, Remainder) . 
get_sentence{ [X. . ,Y,z|Rest] . [X, . J , ty,2|Rest] ) :- % break up "140, 3 + " 

number (X), number (Y). 2 = • + !. * Y belongs to • + • for new sentence 
get_sentence( [X. . , Y. ZjRestl , (N|SRestl ,LRest) % 1,5 cm 

number (X), number (Y), 
%(wdef <Z.unit,_) ; Z = x) , 
2 \s • + % brealc up "140. 3+" 
! , 

name(X,Dl), name(.,D2), name(y,D3), name ( ' E-t-OO ' ,D4) , 
append ( [D1,D2,D3,D4) ,D) , name(N,D), % put number together 
get_sentence ( [Z|Rest] , SRest, LRest) . 
V common abbrev • . 

get_sentence( [X, . |Restl , [X|SRestl ,LRest) :- % abbrev ending in 

% list of common abbreviations seen in reports should not end sentence 

member (X, [vs, dr , cm, mg] ) , get^sentence (Rest , SRest , LRest ) , ! . 
% list of start of names in reports should not end sentence 
get^sentence ( [X, . | Rest) , fX | SRest) . LRest) : - % abbrev ending in " . " 

member (X, [ms, mr , mrs , dr, st) ) , 

skipname (Rest , RestO) , % skip name part 

get_sentence (RestO, SRest, LRest) , ! . 
% more known abbreviations 
get_sentence ( CWi [Rest] , [Rep | SRest) , LRest) 

abbrevchkC CWljRest) ,_,Rem,Rep) , % abbreviation 
get_sentence (Rem, SRest, LRest) , ! . 
% possible simple xml tag for new paragraph 

get_sentence (t*<',p, / '>'! Rest) , Sent , Rem) : - %skip paragraph marker 

get_sentence (Rest , Sent , Rem) , ! . 
% xml tag for sentence '<s>' 

get_sentence ([•<', s,'>'| Tail ) , Sentence , Rest ) : - 
enclosedPart (Tail, Sent, Rest) , 

(last ( ' . • , Sent) , Sentence » Sent, • ; %al ready has • . ' 
append (Sent , C . ) , Sentence) 
) , 1 . %add • . • 

get_sentence ( [ . |Rest] , [ . 3 ,Rest) :- l. %end of a sentence 
get_sentence ( [; [Rest) , [;) ,Rest) :- ! . 
% interior of sentence 

get_sentence ( [X | Rest) , [X | SRest) , LRest ) : - 

get_sentence (Rest , SRest, LRest) . 
get_sentence ( [),[],[]). % no more sentences 

% abbrevchk(+WordList. -AbList, -RemList. -Target) is true if an abbrev is prefix 
% of WordList, RemList is suffix of WordList (excluding prefix), 
% AbList is prefix consisting of abbreviation 
% and Target is target form of abbreviation 
abbrevchk ( [Wl |Rest) , AbList , RemList , Target) 

abbrev(wi, AbList, Target, Dom) , % abbrev knowledge base indexed by 1st word 

append (AbList, Rem, [Wl I Rest) ) , % remainder of abbrev. must be in sentence 

(Dom a general, !; % abbrev. applies to all domains 
domain (This rep) , Dom * Thisrep, ! ; % abbrev. applies to this domain 
is list (Dom), member (This rep. Dom) % this domain in abbrev. list 

), 

{ % add back ' . ' to sentence if it also signals end of sentence 
Rem s [J , last ( • . ' , AbList) , RemList » ['.'], i %no more words 
; % words that generally start a new sentence 
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Rem = (W2j_], last ( ' . • , AbList) . member {W2, (his, her, he, she, the, this] ) , 
RemList = ( ' . ' I Rem] , I 
% don • t add ' . ' back 
RemList = Rem 

) . 

% skipname (+Beglist , -Endlist) ; skips next word after "mr" or "st" 
skipname ( f] , C3 ) : - f . 

skipname s iRest] , Rest) : - I. % "Luke's" 

skipname { [o, I Rest] ,Rest) : - i- % "0' Grady 

skipname ([_ I Rest] , Rest) :- 

%get_section (+Toklist, -Sents, -Rest, -Section, -Printname, Addno) 
% Toklist contains input list; 1st sentence should be a header; 
% Sents are all sentences in section; Section is name of section 
% Sentences at beg. of Toklist are ignored until a section header is found 
get_sect ion ( [Tj Toklist ], Sents, Rest, Section, Printname, Addno) :- 
% first sentence should be section header 
get_sentence ( [Tj Toklist] , Sentence, RToklist) , 

( sect ion_header {Sentence, Rsent, Section, Printname) , % Sentence is a section 

header 

append (Rsent , RToklist , RToklist 2 ) , 
get_sectionsents (RToklist2 , Sents, Rest) , 

(Addno =0, !; * testing if input begins with section header 

Addno « 1, ! , sectno(Sectno) , Newno is Sectno + 1, 

retractall (sectno (_) ) , assert {sectno (Newno) ) 
), 

retractall (paragno(_) ) , assert {paragno (1) ) , %lst parag. of section 
retractall (sentno(_) ) , assert (sentno (0) ) %lst sentence of parag. 
; % 1st sentence is not a legitimate header - return [] 
Section « [] 

% get_section (RToklist, Sents, Rest, Section) % skip till find header 

I. 

get_section( [3 ,[],[),[)._,_). 
get_sectionsents ([],[],[]) : - ! . 
get_sectionsents (Toklist, Slist, Rest) : - 

get__sentence (Toklist , Sentence, RToklist) , % one sentence 
{\+ s€ction_header {Sentence, _,_,_) , %more sentences in section 
get^sectionsents (RToklist, RSents, Rest) , 
append < Sentence , RSents , Slist ) 

; % the next section is a section header - return 
Rest a Toklist, Slist « (3 ) . 

section_header(S, Rests, 'report clinical information item', 
'CLINICAL INFORMATION: . ' ) : - 
(S « [clinical, information, ':',*.'], ! , Rests » [] ; 
begsublist ( [clinical, information, ' : ' } , S, Rests) , ! ; 
S s Cclininfo, ':•,'.•] , Rests =(],!; 
begsublist ( [clininfo, 1 ,S,RestS) , ! 
) . 

sect ion_header (S, Rests, ' report impression item', 
' IMPRESSION: . • ) : - 
(S = [impression, ':•,.], Rests = [] , i ; 
begsublist ( [impression, • : * ] ,S. Rests) , ! 
) . 

section_header (S , Rest , • report summary item ' , ' SUZt^fARY : . ' ) : - 
S s [summary, • • • |Rest] . 
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section_header (S, Rests, 'report description item', 'DESCRIPTION:. ') 
(S = [description,':'..], Rests = [] . !; 
begsublist ( [description, ' : • 1 , S, Rests) , ! 
) . 

section_header(S, Rest, ' report diagnosis item' , 'DISCHARGE DIAGNOSIS: ) :- 
(S * [discharge, diagnosis, l^est) ; 
S = [final, diagnosis, ' : ' [Rest] ; 

S = [principle, diagnosis, • : ' |Rest) ; S = [associated, diagnosis, * : • |Rest] ; 
S * [transfer , diagnosis, ' : ' |Rest] ; 
S = [diagnosis, '{', es, *)',':' |R®stJ ; 
S = [diagnosis, : I Rest! 
) , ! . 

sec tion_header<S, Rest, 'report laboratory data item' , 'LAB DATA: , • ) :- 

S = [laboratory, data, ':• |Rest) , 
sect ion_header(S, Rest, 'report medications item' , 'MEDICATIONS: . ') :- 

S s [medications, * : ' |Rest] , ! . 
section_header(S, Rest, 'report current medications item' , 'MEDICATIONS :.* ) 

S = [current, medications, « : ' |Rest], ! . 
section_header(S, Rest, 'report discharge medications item', 
'DISCHARGE MEDICATIONS : . ' ) : - 
S = [discharge, medications , ' : • [Rest] , ! . 
sec t ion_header (S, Rest report discharge disposition item', 
'DISCHARGE DISPOSITION: . ' ) : - 
S = [discharge, disposition, * : ' |Rest} , i . 
section_header(S, Rest, ' report medications on admission item', 
■ MEDICATIONS t . ') : - 
S = [medications, on, admission, •:' |Rest) , I. 
section_header(S, Rest, 'report medications on transfer iterm', 
'MEDICATIONS:.') 

S = [medications , on, transfer , ' : ' |Rest] , ! . 
sec t ion_header (S, Rest, 'report procedure item' , 'PROCEDURE: . ') :- 
(S = [operation, iRest 3 ; S = [procedure, ':' |Rest] 
> , ! . 

section_header(S, Rest, 'report indications for procedure item' ,' INDICATIONS : 

(S = [indications, for, procedure, |Rest] / S = 
(indications, for, operation, ' : ' |Restj 
} , 



section_header{S, Rest, 'report preoperative diagnosis item','PREOP DIAGNOSIS 

S = [preoperative, diagnosis, ' : * |Rest] , I . 
section_header(S, Rest, 'report admitting diagnosis item' , 'ADMITTING 
DIAGNOSIS: . ' ) 

S s= [admitting, diagnosis, • : ' |Rest) , ! . 
section_header (S. Rest ,' report postoperative diagnosis item' , 'DIAGNOSIS :.' ) 

S a (post operative, diagnosis, ' : ' (Rest] , i - 
section_header(S, Rest, 'report physical examination item', 
' PHYSICAL EXAM: . ' ) : - 

S = (physical, examination, ' : ' |Rest3 , 1 . 
sect ion_header (S, Rest, • report chief complaint item*, 'CHIEF COMPLAINT:.') :- 

S = [chief , complaint, ' : ' [Rest] , ! . 
section_header(S, Rest, 'report hospital course item' , 'HOSPITAL COURSE:.') :- 

S s [hospital, course, ' : * |Rest] , ! . 



58 



BNSDOCID: <WO__00ea887A1 J_> 



wo 00/63687 




PCT/USOO/10302 



section_header (S.Rest, 'report allergy item' , 'ALLERGIES: . M :- 
S = [allergies, •:' iRest 3 . i. 

section_header (S,Rest, 'report follow up item', 'FOLLOW UP:. ') 

S « [follow, up, ':' (Rest] , !. 
section_header (S/Rest, ' report findings item' , ' FINDINGS : , * ) : - 

S (findings, iRest] , !. 
sect ion_header{S, Rest ,' report indications and findings item' ,* FINDINGS :.' ) 

S « [indications, and, findings, ' : ' |Restl , J . 
sec t ion_header (S, Rest, • report indications and findings item* ,' INDICATIONS : . 

S a [indications, • : ' |Rest) , ! - 
section_header<S, Rest, 'report provisional diagnosis item' ,' PRELIM DIAGNOSIS 

S = (provisional , diagnosis, ' : • |Rest] , i . 
section_header (S,Rest, 'report review of systems item*, 'REVIEW OF SYSTEMS:. ' 

S = [review, of , systems, ':' 1 Rest] , !. 
section_header(S,Rest, 'report past history item', 'PAST MEDICAL HISTORY;.') 

S = [past .history, section, ':' [Rest] , I. 
section^header (S,Rest, 'report past history item', 'PAST MEDICAL HISTORY:.') 

S - [past , medical, history, * : ' (Rest] , • . 
sect ion_header (S, Rest, • report social history item' ,' SOCIAL HISTORY:.') :- 

S = [social, history, ':' (Rest] , i. 
sect ion_header(S, Rest, 'report past history item' , 'PAST MEDICAL HISTORY: . •) 

' C = [history, ':' |Rest] , !. 
section_header(S, Rest, 'report past history item', 'PAST MEDICAL HISTORY:. •> 

S - [brief , history, ':' |Rest] , i. 
section_header(S, Rest ,' report history of present illness item", 
'HISTORY OF PRESENT ILLNESS:.') :- 

S s [history,of /present, illness, ' : ' jRest] , ! . 
section_header(S, Rest, 'report history of present illness item',* 
•HISTORY OF PRESENT ILLNESS:.') :- 

S = [history,of, the, present , illness, ':' |Rest] , !. 
section_header (S , Rest , • report specimen item* , ' SPECIMEN* ) : - 

S = [specimen I Rest] , !. 

% sentence consists of id number only or "." only, 
isidentif ier ( [X, . ] ) ; - 

integer (X) . 
isidentif ier ( [X, ;] ) :- 

integer (X) . 

isidentif ier ([.] ) :- !. % sentence consists only of . 
isidentif ier (['.',' <eos>' } ) :- !, 

isidentif ier (['<*, p, */*,*>•]) :- % paragraph marker sentence - update no. 
paragno (N) , 

retractall (paragno (_) ) , 
Newno is N + 1, 
assert (paragno (Newno) ) , 
retractaj.1 (sentno (_) ) , 
assert (sentno(O) ) . 

% skipsentence is true, if sentence should be ignored. 
% Skip sentences containing family info 
skipsentence ( (X I _3 ) :- 

f oundword (X, family) , I . 
skipsentence { [X | _] ) : - 

foundword(X, insurance) , ! . 
% This occurs if sentence contains 
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% a sequence in skips database and sentence also contains findings. 

skipsentence ( [X|Rest] , Semi ist, Error) :- 

skips ( [X I Sseq] ) , % X is the beg. of subseq. in skip database 
pref ix ( [X I Rest] , [X I Sseq] ) , % sentence contains subseq. 
(subtype Semi ist) , % sentence contains information to be extracted 
Error = no; % don't try to segment 

Error = yes), % treat sentence as error and try to segment. 

skipsentence ((_ I Rest J , Semi ist, Error) 
skipsentence (Rest , Semlist , Error) . 

% f indingseg (+S, -Fseg, -Begseg) : partitions sentence 

% S is the sentence; Begseg is the segment preceding the 

% modifiers of the finding; Fseg is the segment of S starting 

% with the leftmost modifier of the finding and consists of the 

% remaining sentence. 

findingseg (S, Fseg, Begseg) 

partition (S, Begpart,Restpart) , 
(Begpart = [] , Begseg = [) ; 
Restpart = [] , Fseg = [] , Begseg = S; 
rightlstmod (Begpart , Begseg, Modseg) ) , 
append (Modseg, Restpart, Fseg) . 
f indingseg (_, [1 • • 

actionf indingseg (S, Fseg, Begseg) : - 

partition (S , Begpart , Restpart) , 
(Begpart = [] , Begseg = (} ; 
Restpart = [] , Fseg = [] , Begseg = S; 
reverse (Begpart, ReversedBefore) , 

f indsubstance (ReversedBefore, Rest) , 
append (Substancepart . Rest , ReversedBefore) , 
reverse (Substancepart, Leftpart) , 
reverse (Rest, Begseg) , 
append (Lef tpart , Restpart , Fseg) ) . 
actionf indingseg (_,(] !. 
f indsubstance ([],[!):- ! . 
findsiibstance ( tXjRest] ,Rest) 
substance (__. [Xl , CI ) . ! . 
f indsubstance ( [X | Rest 1 ] , Res t ) : - 
findsubstance (Restl,Rest) . 

partition (+S, -Begpart, -Restpart) : partitions sentence 
S is initial 

partition (+S, -Begpart, -Restpart) : partitions sentence 

S is initial sentence; Begpart is part of sentence before the 
finding; Restpart is the rest of the sentence and starts with 
the finding. If there are 2 consecutive findings 
the 1st one is considered a modifier 
partition ( [],[],[]):-!. 

partition ( [XtRest] , [X| Begpart] , Restpart) : - 

not (is finding (X) ) , ! , partition (Rest, Begpart , Restpart) - 

partition ( [X, Y|Rest] , [X] , [Y|Rest] ) :- 
isf inding(X) , isfinding(Y) , !. 

partition( lx|Rest] , [] , [XlRest] ) :- 
isf inding (X) , 1 . 

% isf inding (+X) : is true if X is a word or phrase whose semantic class 
% is a finding or sxibtype of finding. 
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is finding (X) : - 

£oundword(X, Sem) , i semantic class of word 

subtype tSem] ) . % is class a type of finding, recommend, or technique 

% semantic class which are types of relevant information 
subtype ( finding, S em) : - 

intersect (Sem, [attach, createbond, breakbond, activate, 
inactivate, substitute, transcribe, express, promote, 
signal] ) . 

% there is only one type of technique class 
subtype (technique, Sem) : - 

member {technique, Sem) . 
subtype (time, Sem) 

intersect (Sem, [status, sstatus, change, tmper, vstatus] ) . 
f indinginlist (Sem) : - 

intersect (Sem. [attach, createbond, breakbond, activate, 
inactivate , substitute , transcribe , express , promote , 
signal] ) . 

% chkforpunct (■♦->?, +Rest) : is true if there should be no space after word w 
chkforpunct (W,_) : - member (W, C '/' 1 * , 

% nothing left to write, 
chkforpunct {W, [] ) :-!. 

% is true if there should be no space before word after current word 
chkforpunct (_, [w I _] ) 
ispunct (W) . 

% ispunct (+W) is true if w is a punctuation for sentence print out 
% The following characters are not treated as punct: 

ispunct (W) : - member (W, [•,»,',•,•;•,'/',•<',•>',•?'/*•••*•-'»•:'.""*' t ^' 3 ' * 

\ + = M 'X'' • 

% rightlstmod(List,Firstpart,Modpart) : Modpart begins with the first 
% word in List which is a modifier; Pirstpart are the preceding words 
rightlstmod( [],[].[]):- 1 . 

% X is a modifier or finding; Beginning part is empty 
rightlstmod.{ [X|ReetJ . [] , [x|Rest] ) : - 
f oundword (X, Sem, Target) , 

{modifier (Sem) ; Sem = p. Target = [_,conn]; sxibtype (_, [Sem] ) ) , !. 
% X is not a modifier or finding 
rightlstmod( [XjRest) , (X| Firstpart) , Modpart) :- 

r ight 1 s tmod ( Res t , Firs tpart, Modpart) . 

% frame (Frame, Type, Value, Mods) : creates a list Frame, whose 1st 

% element is Type. 2nd element is Value, and 3rd is a list of 

% modifier frames or is emtpy 

% Case where modifier list is empty; Value should be atom except for 
% certain types; 

frame ( [Type, Value] , Type, Value, X) :- 
(X = []; X = [(33), 
atom (Value) , ! . 
% Special cases where value of type should be a list 
frame( (Type, [H|R]] ,Type, (H|R] .X) :- 
(X - []; X = [ []]), 
oklist (Type) , ! . 

% Modifier list is merged with list consisting of Type and Value 
frame ( Frame , Type , Value , Mods ) : - 
atom (Value) , 

append ( [Type , Value] , Mods , Frame ) , ! . 
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frame (Frame, Type, [H|R), Mods):- 
is_lisC CR) , 

append (R, Mods, NewMods) , 
append ( [Type, H] , NewMods. Frame), !. 
% Components of Frame 

frame ( [Type, Value I Mods) , Type, Value, Mods) !. 

% Value of Type should not be a list; first element of value is real value 

frame ( [Type, H, Rest) , Type, [H I Rest] ,[] ) :- !- 

% Special cases where value of type should be a list 

%frame ( (Type, [H|R] ] ,Type, [H|R] , []) :- %repeated from rule above 

% oklist (Type) , ! . 

% Value of Type should not be a list; first element of value is real value 
frame (Frame, Type, IH I Rest] , Mods) 

mergemods (Rest , Mods , NewMods ) , 

append ( (Type , H] , NewMods , Frame) . 

% me rgemodinf (-F, + Frame, +Mods) : Frame is a type -value -mod frame; Mods 
% is an additional set of modifiers for Frame; mergemodinf adds Mods 
% to Frame, resulting in F. 
mergemodinf ([!,[],_):-!. 
mergemodinf {F, [rel , X | Rest] , Modrel) :- 

mergemodinf ( Fl , Rest , Modrel ) , 

append ( (rel,X] ,F1,F) , ! . 
. mergemodinf (F, [Fl, X|Modf in) , Modrel) : - 

atom ( Fl) , mergemods (Modrel , Modf in , Mod) , 

append ( [Fl , Xl , Mod, F> , ! . 
mergemodinf (F, [H|R] , Modrel) :- 

- mergemodinf <F1,H, Modrel) , 

mergemodinf (F2 , R, Modrel ) , 

append ( [Fl] .F2,F) . 

% addmodstof (-fArgs, +Mods, -NewArgs) is true if Args is a list of formats, 

% Mods is a list of modifiers and NewArgs is a list of formats where Mods 

% has been added to modifier list of that format 

addmodstof ([],_,[] ) : - .! . % no more formats 

addmodstof ( [Format 1 1 Rest) ,Mods, [Fl | NewRest) ) : - 

mergemodinf (Fl, Format 1, Mods) , % merge modifiers into 1st format 
addmodstof (Rest, Mods, NewRest ) , !. %add modifier to remaining 

% oklist (+Type) : is true if Type can have a list as its value 

oklist (unitval) . 

oklist (age) . 

oklist (measure) . 

oklist (prev_timeunit) . 

oklist (future_exam) . 

% mergemods (4-Modsl, -i-Mods2, -Mod) : Modsl and Mods2 are a list of modifier lists 

% Mod is the merged list; some elements of Modsl and Mods2 may be 

% empty 

mergemods ([) ,M,M) !. 

mergemods (M, [] ,M) . 

mergemods (Modsl , Mods2 , Mod) ; - 

delete (Modsl, [] ,M1) , 

delete (Mods2 , C] ,M2) , 

append (Ml, M2, Mod) . 

% addmod(4>Mod, +Modlist, -NewMod) : NewMod is formed by including 
% Mod into Modlist 

addmod ( [) , Mod, Mod) : - ! . 
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addmod (Mod, [] , (Mod] ) : - • ! . 
addmod ( Mod , Modi i s t , NewMod ) : - 

append ( iMod] , Modi ist, NewMod) . 
% modi i St (+Listof Mods, -Mods) ; ListofMods is a list consisting of 
% individual modifier frames, some of which may be empty 
% Mods is formed as a list of non-empty modifiers 
modlistd) , []) 1. 

% ignore a modifier which is an empty list 
modlist ( C [] |R} .Mods) 

modlist (R,Mods) , !. 
modlist {[ [Hi Rl] I R21 , Mods) 

atom(H) , ! , 

modlist (R2, Rmods) , 

addmod ( [H|R1) , Rmods, Mods) . 
modlist ( C [H I Rl] jR2] , Mods) :- 

is_list(H). J , % is first element is a list 

modlist (R2, Rmods) , 

mergemods ( Ch | Rl ] , Rmods , Mods ) . 

%bpframe: creates from for sequences of bodyloc/ region/position 
bpframe(F, [] F, [) ) : - I . % only 1 bodyloc 

bpframe(P, [] ,Type, Bpl , Bp2) % no conj relation but more than 1 bodyloc 

frame (Bpl, BplType,BplVal, BplMods) , %contents of Bpl frame 
f rame {Bp2,Bp2Type,Bp2Val,Bp2Mods) , %contents of Bp2 frame 
( (BplType = region; BplType *= position) , 
Bp2Type a bodyloc, % 'left limg' , 'area of lung' 
mergemods (BpiMods, Bp2Mods, BpMods) , %new region modifier 
frame (NewBp2Mods, BplType, Bp IVal, BpMods) , %new Bpl frame w new mod 
frame (F, Bp2Type, Bp2Val, (NewBp2Modsl ) % main frame is bodyloc 

BplType = bodyloc, Bp2Type - bodyloc. Type » main, %Bp2 is main 

mergemods (BplMods,Bp2Mods, BpMods) , %new bodyloc modifier 

f rame<NewBp2Mods, BplType, Bp IVal, BpMods ) , % 'joint of shoulder' 

f rame (F, Bp2Type,Bp2Val, (NewBp2Mods} ) % main bp frame is shoulder 

mergemods (BpiMods , Bp 2 Mods , BpMods ) , 

fra me (NewBplMods , Bp2 Type, Bp2Val , BpMods) , % 'shoulder joint' 
frame (F, BplType, BplVal, [NewBplMods] ) % main bp frame is shoulder 

) . I . 

bpframe(P,Rel, ,Bpl,Bp2) :- % no conj relation but more than l bodyloc 
Rel =« trel,Conj |_] , Bp2 \= [) , 
mergemods ( [Bpl] , (Bp2j ,Conjargs) , 
frame ( F , rel , Conj , Conj args ) . 

getrelation(R,Fl,F2,F) :- 
<F2 \- [3 , . 

(Fl « [rel,Conjl|Restll , R = [rel, Conj], 

(Conjl = Conjl = or; Conjl = and), 

(Conj = ' , ' ; Conj = or; Conj s and) ; 

Restl = (Fl] ) , 
(F2 [rel,Conj2|Rest23 , 

(Conj2 = ','; Conj 2 = or; Conj2 a and); 

Rest2 = [F2J ) , 
%splice( (R, Restl, Rest2] , F) ; 
append ( [R, Restl, Rest2] ,F) ; 
F2 - [1 , F a Fl ) . 
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uptotal ; - 

addstotal (X) , 

X =< 50, 

NewX is X + 1, 

retractall (addstotal (X) ) , 

assert (addstotal (NewX) ) , • . 
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A ppendix p 

# ! /usr/bin/perl 
#Scan.pl : Scans blast output 
#Author; Michael Krauthammer 
#Copyright: c.1999, Columbia University 



^Variables 



#blast input/file 
$ i npu t_f i 1 e = " genebank .result"; 
^program output 
$output_file= "match . txt" ; 

#open datastream for file which contains blast output 

open (INPUT, • /storage/psi -blast /Marklt/programs/markit . result ' ) ; 

while ($line«<INPUT>) { 

if ($line=-/\>gi\| (\ci*) (.*) \ .(.*) \ ,(.*)/) { 
$ target =$4 ; 
$gi =$1; 

$semantic__class=$3 ; 

1 

if ($line^-/Length = (.*)/){ 
$lengthl=$l; 
} 

if {$line=-/Identities \^ {\d*)\//){ 
$length_actual*$l 

) 

if ($line=-/Query: (\d«)/){ 
$start=Sl; 
> 

#print if Subj 1, sometimes match 2 or 3 line long 

if ($line=-/Sbjct: 1 /) { 
if '( ($length_actual/$lengthl) > ,9){ 
print 

$target, " \ " , $start, " | " , $start+$lengthl , " ] " , $semantic_class , " | " , $gi. "\n" ; 
} 
) 
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Appendix 

#!/usr/bin/perl 
#nucleotide_text_parser.pl 
• ^Author: Michael Krauthammer, c.1999 Columbia University 

open (INPUT, $ARGV(0) ) ; 

#read uncoded input text line by line (chop it) 
$all= ■ • ; 

while ($line*<INPUT>) { 
$all=$all.$line; 

} 

open (INPUTII, ' /storage/psi-blast/Marklt/programs/markltll - result ' ) ; 

open (OUTPUT, ' >result . txt • ) ; 

#fir5t part: check matches, store positions 

while ($line=<INPUTII>) { 

($name,$start,$end,$semantic_class,$gi)*$line«-/(.*)\| (.♦)\| (.♦)\| (.*)\| (.*)/; 

^divide by 4 (4 letter code) 
$start= <$start>l) /4; 
$end= ($end-l)/4; 

#get substring 
if ($start • = 0) { 

$letters==substr ($all, $start-l , $end>$start'i-3) . " | " ; 
} else { ■ 

$letters = ' • . substr l$all, 0 , $end+2) . » | « ; 
} 

($letter_beginning) =$letters = -/ C .) / ; 
$letter_end=substr ($all, $end, 1) ; 
$ letter^endll sssubstr ( $all , $end , 2 ) ; 

# ignore matches that are in the MIDDLE of sentences, allow plurals 
$letter_beginning=-tr/ lA-Zl / la-z3 / ; 
$letter_end=-tr/ [A-Z) / [a-z] / ; 

if ( ( ! ($letter_beginning*-/ [a-zl/) ) ( ( • ($letter_end=-/ [a-z] / > ) || 
($letter_endll=-/s /) ) ) { 

#make sure only the first occurence is stored at this position 
if ( $save{$start }=='•) { 

$save{$start}=$end. • | • . $semantic_class . ' | • .$gi; 
) 

foreach $key (keys ( %save) ) { 
($end_key) =$save{ $key) ( . *) \ I / ; 
if ($end_key>$end) { 
if ($key<$start) { 

$save{$scart)= 'null • , 

} 

} 

} 

} 
) 
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#second part: print out marked up document 
sort (%save) ; 

for ($i=0;$i<length($all) ;$!+*){ 

if ((!$save{$i}=='null') ( $save { $i } = -/■/)) 1 

($end.$semantic_class) =$save { $1 } =-/ ( . * ) \ | ( ^ *) \ I / ; 
print OUTPUT ' <phr=" ' . $semantic_class , • ; 
$store=substr {$all, $i, $end-$i) ; 
print OUTPUT $store; 
print OUTPUT "</phr>"; 
$i«$end-l; 

} else { 
$store=substr($all,$i.l) ; 
print OUTPUT $store; 



} 
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BASE COUNT 4 05 ft 545- c 493 9 276 t € OtHers 

ORtGIN 

1 CAqcc^&agc amgcaaaaat tcttcf^agga gctgA^'^A^Q ■ agoctg^acg C^t^ccctga 
61 ggayttotgt cggcacsagg tgctgDcccd gc-tpctgacc gccttCfllftgt tc^gcaatga 
L2I tgg^^gcG^tt .^tcctcscg^ -coctct:tc«« ggtgggc^ag ttcctgagcg ctgaggagta 
lei tcagcogaag atcatccctg* tggtggtcaa gatgttctc* tccactgacc ^g^CCfetgcg 
241 catccgcctc ctgcagcaga tggagcagtt catct»i9t*c ctfcgacgagc- JC*ft«gtC«« 
301 cacccagatc t;tccGCcacg tc^tacatgg cttcctggac aQCvliCCCt'g .ccatccgggs 
3€I Qcagac^gtc B&gtee:&t9C t^ct cctggc cccaaagctg Aacgaggoca acctcaatgt 
421 ggagctgatg aagcact^ttg cacggctaca ggCCOeggat ^aacag^^ec ceat-^cgc^g 
4 SI <«acaccac» gtctgcctg^ gc^aaategg ctcctaccte agigctiagca ccagacacag 
541 ggtccttfiCG tctgeettca Qeegagecac tagg^acccg tttgcaccgt cc«ggfltt^< 
601 gggtfictcctg ggcttitgctg .ccacccacaa cctctactca atga|iC9«ct gtgceca^aa 
663 gatcct^ee^ ^tgct«te«g gtcteaetgr agatrcctgag «aat:cdgtgc gagaccaggc 
121 cttcaaggcm wttcggagc^ tcctigtccaa at^ggagtct gtgtc:gge(g^ a^^ccgaoeca 
761 gctggaggaa gtgr^agaagg .a&^tcc^tgc agcctccag^ ^ctggoatgg gaggagccgc 
041 agct:agetgg gcagget^g^ cgt.gaccggg gtctccte^c t.ca.cctccaa gctgat^egt 
901 tcgcacGcaa ccactgcccc aacagaaacc a«CB.t:tCGcc aaagftcecae geeTgaagga 
9ei .gttcct^ecc cageccecib-C e«:cfcgttcct gccacccct* caacctcagg ccactgggag 
1^21 acg^^aiggagg aggacaagga cacagca^gag g«c»gcagca ctgctge«i&g atgggaegac 
1001 gaagact^gg geagectgg'a gcag^aggce gag^ctgtgc t.ggcccagca • 9g«cg»ct^g 
1141 a gca c^^^gg g ccb agngag * ccg t get agt ca gg-t ^ca act cc ec« ca.aa t ccr c e 
1201 »«ift'Cccccflig ^gtccgactg gag<^Bgctgg gaaxctgagg gct.ect.ggga aoagggct.gg 
1261 caggagc^BB gcteccagga gccacjctyct .gacggtflcac ggotggcci»v *^*gt»taae 
132^1 tggggtggcc cagagtcc^g cqacaagggc ga^fC-ce^tcg ctaCCCteft-e tgcacqtccc 
1391 9gcBccc«gc cg^iggccagc ctcttg^ggt geggacaact gggagggcct ^gagactgac 
144 i agtcgac^gg- rca^ggctga ^^^tggceegg aagaagcgcg aggagcggcg gCgggagatg 
iSOl gag^ceaaac ^cgccgagag gaaggtgcca. agg^^^cccat gaagctggga gcccggaagc 
lS-61 tggft^ctg#*c cgtggcggt^ gcccttc<?cg cj-^tgcggaga ^cccgcccca cagatgtatt 
1621 tattgt»eB.a accBtgtgag cccggccgcc cagc^^aggcc at^ctcAcgtg taCQt«ft1tca 
I6BI gagccacaa^ aaattct«t;t tcacaaaaa^ aaaaaaaaaa aaaaaaa 
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>sp|P155331RFn MOUSE DOWN REGULATORY PROTEIK 
OF INTERLEUKIN 2 RECEPTOR (J03776) rpt-lr [Mus 
musculus] Length «= 353 

Score - 92,0 bits (237), Expect = 6e-20 

abiut 4 \a*MIKEEVTCPICiCLtt«iV5M?CKH5rCFRClTli««-SWWP DGKGHCFVCHV «0 

CU«ry eT^SATGINSLtTVWYfiLKSTVXfOfWKlKISt^ t^ffVCK5f«i3CPL«I FCLTDW&lilCC 3il 

OuvTY icA7iv#eKrE«vrc:si£cu.yAo^nciKrz&trQ(fir ci:wiotikl8RI£iiket9k "^dd 

Sb)et 117 X<C£n5gSKIUp}mALI[XVD0EVKEia^QGIU.ViaMI^^ — 1?1 

abjct 172 ZJ<3QXOI->-NVE]n?Q(L0n<CLRI!3aJI5XEN&ei^KIJC^ 222 



Homology covers ring finger, B-box dud the begioAmg of ooiled coil domain 
in the CLL rbg fixkger protfiin 
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Activated CIM'^T^cdls 

Rptl (f epiesscs expression of IL-2 receptor) 

liLr«.eptor ► „onMlexpT™nofBcl2 

1 1 

1Lr2y Jt'-lS noonal apofrtosis 



When rptl is knocked out: 




3 



i 

IL-2r<«ptoi ► ow«pr«»tot.ofBcl2 
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gblM^'tft^SQl Wre^e^ t«71«Q£,tl Hct^CG^P^tiC^l Home «k^Ulvf «DN9l oicmft IKPfiCilDaUQ 

MCPtASTflavxUMCFIi^fWCllEftEHGiyAflLCrH aP4 4 RR^K ^ O^Kh ♦'le^^ 

St^ct : VHNe:L^Rf»olJCFCXVlLK0Qrt21«6DCAArTTl,rLLRIUlWH70ll^ 41& 

St^ct: 41 ( ERLRT IJD 

«MICO24O7(C0240l ifWCSODl 22794 Huw q4nf 94qn>tu'c, -direct^ CBHA J«QUc/le«. 
length ^ S4« 

SCOT* - dt.S blttf (339J. Expect » «*-?a 

rrAme - *J 

Bdjct: 4^ KlC»>»'Gn3i»1QLR8XMmRKRLRAOSLDCdfiUSER^ ^24 

Quiryi I0« 1^7 
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aftSfi COUNT 130 B c 2&3 g 106 t 5 Other* 

DRIGIK 

1 cagccgcttg ctccggccgg cAccctJi^gc c9C«^t<e9C eaggctigticg ccgacatg^ft 
61 accctt^gcc •.gcn^tttcc- eggtcct^ct gcaggcggcc gagttc^-tgg ^gc^ccflftga 
121 ^agA^&^rgcc gagcstQgtt atigcgtccct gtgcccgcat Cgcug^^cftf) gcccce^ccd 
IBl t^Aggaggaag aagcgacccc- cccaggctco tg^c^c^^^g gacagcgggc g^tcagtgca 
241 caat.g&4ct;9 gAgaagcgc« ggagggcccA gt.t9«»^t=99 ngcctiggagc ggctgaagc* 
301 gc»96tflecc et9g9C9gfCg aetgtgcccg gt a caeca eg .ctg»^cctgc tjicgccgtgc 
eag^atgcac atccag&agc tggagg^itca ggagcagc^g ^ce^gacagc t^caaggagag 
4,21 gct^cgca^clL' 0«gcagci9» gcctgcagcg gc.&nt.ggat:g oagctccsgg ggctggcK.gg 
4Q) ngcggcc^ag cgggagcgnc tgcgggcgga cagtctgi)liC> tec^eagg^c t.&t.cc^ctga 
511 gcgctc«9«c tcagaccaag aggagctgga ggtgf^^tgtg gagagcctgg ^gittggggg 
601 tgoggcegag ctgctgc^igg gcttegtcge eggccaggag cacagct^et «gc«cgteg9 
€&1 eg^cgcctgg cCatgatigcL cctcacccan ggcgggcctc tgccctctt.a ot:egt.tgccG 
721 aagccceictt tnc 
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% lexdemsub . pj. 

% lexdems'ob.pat 

% revised March 17, 20DO 

% LBXICOW OF SUBSTANCES Al^ STRUCTURES 

: -multifile (phir^$e/5) . 
: -multifile (wdef/3) . 
: -unknown <_ J fail) . 

phrase ( ' [ * rprotein^ [ ■ [ ' .ganuna, ' ] ' , ' - ' , aminobutyric, acid, a] , ' GA 
BAA^ r r} . % ? 

phraee ( ' [ ' . ^wiallniolecul^, [ M ' , zeta, ) * , 1 , gubunit] , ' [zeta] 1 subu 
nif r r> . % ? 

phraee(ll6, protein, fll6, •-•,kd,fyn, - • ,aaaociated. protein] , UlS-k 
D Pyn-aasociatGd protein'^,r> . 

phrase {lis, protein, f.ll 6, kd, protein] , 'llS-kd prot-ein',rK 

phrase {3 rpt^c^tein, [3, ' - ' .kinase, * - ' ,akt.l , ' 3-kinaa6-Akt » ^r) . 

phra&e (ability , af f imnacion^ [ability, to], [J r r) . 

phrase (age, prot&ia, [age, protein, kinases], 'AGC , r) . 

ph ra a e (akt , protein r (akt, mutant], 'Akt mutant', r? . 

phrase (alternativer substance, [alternative, ntf] , 'alternative NTF' , r 

) . 

phrase (antibody, protein, (antibody r to. phosphotyrosine] , 'anti-phosp 
hotyrosine' , r) - 

phrase (antigenr complex, [antigen, receptor] , 'antigen receptor' , r) . 
phrase { ap r protein , [ap, ' - ' , 1] , ' AP- i ' , r ) . 

phrase (aspargine^site, [aepargine, ^ - ' , 1411 r ' aBpargine-141 ' ,.r) . 
phrase (b, cell, Cb^celUr 'B cell', r} . 
phraae(br cell, Cb.cell^], 'Bcell', x) . 

phraaeCb, E^peciea, [b, lymphoblaatoid, i^ell^l j lym^^hobl&atoid cell 
B',r). 

phrase (b. cell, lb, lymphoblaetoid, eel Is] ^ lymphoblastoid cells', r 
) , 

phrase (b7, protein, [b7 ,'-',' 1 '] , ^B7-I',r). 

phrase (bcl , protein, [bclr ' - * , 2] , 'Bel -2 » , r> . 

phrase (c, protein, [c,'-',jun] , 'c:-Jiin*,r}- 

ph3rase(camk, protein^ [c&mk, iv] , ^CaMK IV', r). 

phrase (caap,' protein, [casp, ' - ' ,3] , 'caapase-3 * ,r] . 

phraae (caspase J protein, fcaspase, ' - ' , 3, f ami ly, protease] , ■caspase-3 

fsinily protease^r) - 
phrase (caspase.proteini [caapase, ^ - ' , 3, precursor] , 'caspase-i precur 
sor • , r ) - 

phrase {oaspase, protein, [caspase, • - ' . 2] , 'caspase-3' ,r) . 
phraae{caspaser protein r tcaspase, - , 3] , 'caspase- 3^ ,r) . 
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phsc^se (caspassr protein, [caspase, * - ' ( 'caspase-S ' /r) * 

phrase (caspassr protein. [caspase, ' - " , 7] , 'caapase-V* ,r) , 

phrase (catalytic, domairi, [catalytdcr domain?, •catalytic domain', 

r) . ■ 

phrase (cleavage J a ite, (cleavage . si tej , 'cleavage site' ,r) . 

phrase (cleavage, substance , [cleavage, products] , 'cleavage product©', 

r) - 

phrase (cooh, substaAce, [cooh, ' - ' , terminal , fragment] , • cooH-termina 
1 f ragrrient ' , r) . 

phrase I cr)^^ protein, [crJc, proteins) , ' crk proteins ' ^ rO . 

phrase (crkl. complex, [crkl, ' - ' , c3g, complex] , 'crkl-c3g complex' , r> . 

phrase (dcp, protein, [dcp, - ,13 4 *OCP-l '\ r) . 

phraseCdidr negation, (did, not], not, rj . 

phrase (ebvr species r ' Epstein-Bar r virus' rr) . 

phrase (epstein, species, [epsteinr ' - ' ,barr, virus] . ' Epstein-Barr vi 
rue ' ^ r) , 

phrase ( familial, disease, If amilial r alzheimer, ' ' ^ » , ©rdi^eaee} , ' f amil 
ial Alsheimer ' ' ' 's disease \r). 

phraee^gene, gene, [gene, encoding r inter leukin, « - ' ,^1 f/9^ne encodin 
g interl$ultiii-2 ' , r) . 

phrage<gst, protein, [g&t , ^ - ' , » £yn' , » - ^ ,eh2] r *GST-Pyn-SH2 ' ^ r> . 

phraae(gst, protein, [g&t, ^ - ' , ' fyn ' , ' - ^ , sh3] . 'GST-Fyn- SH3 ' , r} , 
phrase {gtp, complex, f^tp, exchange r of, rapl] GTP exchange of Rapi ' , 

i^J ■ 

phrase (guanidinar protein, C^u^i^idiner nucleotide, ' - ' rrelea9ing, fac 
tor,c3g] , 'guanidine nucleotide -releasing factor C3Cj^,r}. 
phrase (guanidine, STOalliiinolecule , [guani dine, nucleotide] > 'guanidine 
nucleotide \t) . 

phrase (guanosine. smallmolecule , l9uarto&ine, triphosphate] , 'guanosin 

e triphosphate ' r r) . 

phrase (guanos ine, smallrrolecule, [guanoaine/ diphosphate] , 'guanosine 
diphosphate' rr) . 

phrase (h4, cell, [h4r eel Inline ] ^ 'H4 cell line'.x), 

phrase {h4, cell, [h4 , human, neuroglioirta, eel la] , • H4 ,huTaarir neuroglioma 
r cells ' , r> . 

phiraBe<ha, protein, [ha/'-', '.[', delta, phpkfoj [Delta J PKPK 

B' rr> , 

phrase (hla, protein, [hla, ' - ' , dr7] , ' riLA-tiR7 ' , r) . 

phrase fi, protein, [i, '['.kappa, •]', b, '-','[', beta, ^ ^ 3 , ' I [lea 
ppaj B- [beta] ' , r) . 

phrase (i, protein, [i , '[', kappa, ' ) ' , b, ' - ' , ' L ' , alph^f ' ) ' 3 , 'I[kap 
pa] B- [alpha! ' r r) . 

phrase (i, protein, [i, '[', kappa, 'J'.bl, ' X (ItappelBSr J - 
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phra Be (ice /protein, lice. .ced. '-',3) r ' lCE/Cdd-3 ' , r) , 

phraae<il, gene, [il, « - • ^ 2 ,gene) . * gene encoding interleukin-2 \ r 

] . 

phraBe(iXr pscotain, lil,'-',2lf ' interleukin-2 ' , jr) . 

phraaedn, intercn, [in, the, case, of 3 f [] , ac) , 

phraee {in, state, Lin, the , anergic, state? . inactive, r) . 

phraee {inducible, cell, .[inducible, h4 , eel IJ . 'inducible H4 cell'^r 

) . 

phrase {interleukin, protein, [inter leukin, ' - ' , 2] , r) , 

phrase {interleukin, protein, tinterleukin, 3], ' interleukin- 3 

phrase (interleukin, protein, [interleukin, r.l r beta, converting, enzy 

me], *interleukin-l beta converting enzyme '.r}. 

phrase (juxkat, ceXI, [jurkatr celll , 'Jurkat cell', r) . 

phrase (jurkat, cell, [jurkat, cells), Murkat cell', r) . . 

phrase (kif 3a. protein, [kif 3a, ,3,bl , 'KIPSA/'iB* ,r) . 

phrasedbl, cell, [lbl,'-Sdrf. cellsl , ' LBL-0R7 cella • . r) . 

phrase (Ibl, cell, tlbl, • - * , dr7 , cellB] , 'LBL-DR7 cella' ,r) . 

phrase (Let, protein, [let , ' - ' , 23] ^ 'Let-23'r rj . 

phxaee (Tnay, probability, [may, be], poesible, rj . 

phrase (myc, protein, (itiy<^, ' - ' , p7QB€kd3el , ' Myc-p7096kD2B • , r) . 

phrase (myc, protein j {royCj ■ - ' , pdfci] , 'Myc-PDKl' , r} . 

phrase (my G, protein, I?tiyc, ' - ' ^p7Da6k] , ^Myi;?-p70e6k • ,r) . 

phraaa<TOyc, protein, iTmyD, * - » , p7&afike2S9d3e] , *Myc-p70B6kE289D3E* , r> 

phrase (myr, protein, [myr, ^- ' raktl , 'Hyr-Akt' ,x) . 

phrase (n, protein, [n, methyl aspartate, receptor], 'N 
MDAR ' , r) . 

phrase (n, protein, {n, ' , methyl, ^ - * ,d, ' - • .aspartate] , 'NMPA' } r 
phrase (nrative, cell, [native , h4 , cell] native H4 cell'^x), 
phrase Cnf, protein, [nf [' , kappa. ' ] ' rb] , 'NF- [kappa] B' . r) , 
phrase (nh2 site, [nh2 , ' - ^ , terminal] , 'NH2-tenninal ' , r) . 
phrase (nh2 , substance, [nh2 , ' - ' , ter?ninal , fragment] , ' nh2 - terminal fr 
agment * , r) , 

phrase <nih, cell, [nih, . 3. 1 3, fibroblasts J , 'Nlri-STS fibroblast©* 
. r), 

phrase (nih, Cell, [nih, '3t3', fibroblasts] . 'NIH-^TS fibroblasts' 
.r] . 

phraee (j^ot^I , substance, [normal , ntf ] , ' normal NTP* , r > . 

phraee (nuclear, protein, [nwcUax, factor, kappa, bj , 'WP'" [kappajB» 

. r)- 

phrase (piSQCjlued, protein, [pJSOGlued, - ,arpl] , '■plBOGlued'Arpi^ ,r) . 
phrase <phosphate,phogphorylate2, [phosphate, incorporated, into! , 
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phrase (phoaptiatidylinositol , small molecular [phosphatidyiinositoi , i 
<'*'/4,',\5,'-' , triphosphate) , 'phosphatidylinoaitol 1, 4, s-tripha 
aphate ' , r) . 

phrase (pho^phoino^i tide, protein, [phoephDirtg&itide, • - • , d«^pen«2ent , 

protein, kinaee) . ' PDKl ' , r) . 
phrase (phospholipaee, pratein, [phQ^phQlips^e, c, * - M] , 'pho&phol ip 
ase C-1' , ttJ - 

phr$^e(poly,protfiinr Ipoly, ' Cadp, '-' .ribose, ' ) polymerase] , "poly 
(ADP^riboa^J polyrrierase ' rr} . 

phrase ( polyvinyl id&r)&, structure; [polyvinyl idene. difluoride, raemb 
ranes) r ' polyvinyl i dene diCiuoride cneml^raneg \ . 
phrase (presenilin. prot^irir [piresenilin, i] , *preeenilin 1 ' , r> . 
phrase (presenil in, protein, tpresenilin, 2] , 'presenilin, 2 ' ,r> . 
phrase (produGtivelYr state, [productively, stiTn«lated] .active, r} - 
phrssfe (protein, prot&in, [protein, tyrosine, kinase] , 'protein tyroai 
ne kinase ' , r) . 

phrase (pirot^in^ protein, [protein, Itinase, c] , 'protein Icinase c, r> . 
phrase <ps2r substance, [ps2, '-' J ctf] , 'presenilin 2 COOH- terminal fra 
gment ' , r ) . 

phrase (pB2r3ub3tance J (p62,c'ifiAvage^ fragm^jitl , ^pxfiaenilin 2 cleava 
ge fragment' , r} . 

phrase (pvdf. etrucrture^ [pvdf , nrtemibranea] , ^'polyvinyl i dene diiluori 
de cnernbranes ' , i^) - 

phrase (rafr protein, Iraf,' -',!], 'Raf-1', i^^) . 

phrase (raf , protean, U^fr ' - ' , 1] , 'Ra£-i ' , r) - 

phrase (rapl . complex, [ rapl , ' - ' , gtpl t ' Rapl -GXP ',1:1. 

phrase {requirement r nefedS^ [ret^uire'meint, for] ^ nfeed(.r)- 

phraae(i3ftr, smalltnoleciile, tser, 1$], 'Ser 19*, r). 

phraa&{ser, stnallmolecule, [ser, 231, 'Ser 23', r). 

phrase {serine, aubstanccj Iserine, residues], 'serine residues', r 

) . 

phrase{src, domain, [src, homology, 2], 'Src homology 2'rr>. 
phrase {src, domain, [src, homology, 3], 'Src homology 2 ' , r} . 
phraae (srebp, protein, tsrebp, ' - ' , l] , ' sterol -regulatory element bin 

ding protein 1 ' , r} ." 

phrase (srebprprotein, [arebp, '-', 2] , ' sterol -regulatory element bin 
ding protein 2 ' . r) . 

phrase (sterol , protein , [sterol . ' - ' , regulatory , element , binding , prote 
in, n , ' eterol-iregulstory element binding protein 1 ' , r) . 
phiTqfeBe (sterol , protein, [sterol, , regulatory , element , binding, prot€ 
in, 2] , 'sterol -regulatory element binding protein 2 ' , r) . 
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phrase (t, cell, It, »-',dr7], 't-DRT'^r). 

phrase (t, cell. It. * - ' ,drt. • / ' , b7 ^ ' - • ^ 1] , 't-DR7/B7-l' ,x) , 
phrase{t, cell, It, cell], 'Tc&ll\r). 
phra^eltr cell. It, cells], 'rc^ll'.r). 

phir^&e(tr complex, [tr*-' , cell , receptor] , 'T-cell receptor' , r) . 
phrase (t, cell. Itj ■ - ' ,dr7, cells] , • t-DR7 cells' , r) . 
p]iraee(t,cell. [t, ' - ' ,dr7, •/ ' .b7. ' - ' , 1] , ' t-DR7/B7-l • , r) . 
phrase (tr complex, ft, ' ■ • ,cell,^intigen,receptorl , 'T-cell antigen rec 
eptor' . r} . 

phre3^ it hire Dili rte r aminoacid, [threonine, 229], 'threonine 229', r) 

phrase ( transcription r protein, I transcription r factor), 'transcript 
ion factor ' , r> , 

phrase (trypan, ^mallmolecule, 'trypan blue',r) . 

phrase (wt, protein, [wt, akt] , ^WTAkt'^r}. 

phrase (zap, protein, [sap. ' - ' , 70] , ^ SAP-70 ^ , r J , 

phrase (zdevd, smallmolecule, tzdevd, ^ - ' * fraJc] , ^ zDEVD-f mk* . r) . 

phrase (il , protein, lil , ' - , 3] , ' interleu]cin-3 ' , r) . 

wdeffab, compleXr antibody)- 

wdef (actin, protein, ^ctin) . 

wdef (activated, stater active) . 

wde£ (active, et^te, active) . 

wdef (ad, disease, 'Alzheimer' ' ' * s disease ■ ) - 

wdef (age, protein, 'AGO . 

wdef (akt, proteinj 'AKT'*) - 

wdef ( anergic, &tate, inactive) . 

wdef fanergic, ©tate, inactive} . 

wdef fanergy, state , inactive) . 

wdef (antibody, cotnpl'e^c, antibody) - 

wdef (Antigen r Gubatance, antigen)- 

wde f ( aop, protein , ' Aop ' ) . 

wdef (apoptoBie,procee&r apoptoeie) , 

wdef (bad, protein, 'BAD'}- 

wdef(c3gr protein, *C3G'>. 

wdef { ' ca2+ ' , sinall molecule, ' Ca2+^ ) . 

wdef {cas r protein, ' CaB ' ) , 

wdef (caspase, protein, caspase) . 

wdef (caspase, protein, caspase} , 

wdef{cblr protein, ^Chl'). 

wdef (ccrsrh, protein, 'CCRSrh^), 

wdef(cd26, protein, 'CD2a^). 

wdef ({^fells, structure^ cell). 

wdef tcfholesterol, 3mallirAolec5ule,chDleBte3:ol) . 
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wdef (cpp32,prc?tein, ' CPP32 ' } . 
wdef(crkl, protein, 'Cxkb'}, 

wdef (ctf, substance, ' COOH- terminal fragment'}. 

wdef (cytokine , smalltnolecule, tytokina) - 

wdef (cytosol , structure, cyto&<:>l ) - 

wdef (djnk, protein. ^d™k' ) . 

wdef\djun, pr'Ctein, 'DJun'K 

wde f {dy namit in^ protein , dynamitin) . 

wdefierk, protein, 'ERK'K 

wdef (eto,s?nallraolecule, "BTOM - 

wdef (etoposide, sTnallmolecLile, etoposide) . 

wdef (fad r disease. ' faTPiliai Alzheimer s disease' J. 

wdef(fyn, protein, ^F>=ti^J. 

wd^£(gdp, Bm^llmolecul&f ' GDP' } . 

wdef (gelflol in, protein jgelsolin) . 

wd^f {gpl2{), protein, 'gpliO'). 

wd^f (grb2 , pirx:?tein, ' Grb2 ' J . 

wdeC{get, protein, 'glutathione fi - transferase ') . 

wdef {gtp, smallmoleoule, ' GTP^ ) - 

wdef {hep7 0,pric:>tein, 'HSPVO') . 

wde f { h uman j ^pec i e e , human ) . 

v/defUkik, protein, 'IKK'). 

wdef (inactivated r state, inactive) . 

v/def (inactive, state, inactive) . 

wdef(jn>::, protein, 'JHK'). 

wdef(jnk, protein, 'JWK'). 

wdef(jnk2, protein, ' cJNK2'}. 

wdfef (kap3 , protein, kap3} . 

wdfef(kdaktr protein, ' KDAkt ' ) . 

wdef (kinase, protein, kinase). 

wdef ( kinectin , protein, kinectin) , 

wdef (klc, protein, klc) . 

wdef (laminr protein, laminj , 

wdef (nny OS ins , prat eirt, r^yoeina) . 

wdef (nmdar, protein, 'NMDAR' } . 

wdef (nTndar2b, protein, ^NWDAR^E'). 

wdef (ntf , substance, iieH2- terminal f ragnnent ^ ) . 

wdef (p70s6kr pirotein^ p70s6k) . 

wdef {p7$e6k, pxQt^inj p7Se$k) . 

wdef (parp, protein, ' poly (ADP-ribose) polyweraaft ' ) . 
wde f (pdkl , prote in, « PDKi ' ) . 
wdef (peptides, proteiUr peptide). 
wdef(pkb, protein, 'PKB'). 
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wdef (pkc^protein^ ^prctfein kinase C) . 

wdef (position r sice, site) . 

waef (positions. site, $itfi) . 

vfdef (protease , protein , pyoteaee) . 

wdef{pBi, protein, 'presenilin l') . 

wdef (p&2 , prote in . ' preseni 1 in 2 ' ) . 

wdef ( rapi , protein , • Rapl ' ) . 

wdef ( rae , protein , ' Ras ' ) - 

wdef (receptors, substance^ r&ceptor) . 

wdeftrela. protein, 'RelA'). 

wdef (reeidufis, substance, resitiue} . 

wdef (reaponsivft, state, active), ' ♦ 

wdef (06^ protein, 'S6'>. 

wdef {selectively, constraint , selective). 

wdef(serll2. aite, '56X112'). 

wdef(serl36. ^ite, 'Serl36'), 

wdei (ser32 , ©malltwolecule , ' Sex32 • ) . 

phrase (pBl> protein 

wdef (ser3S, emallitiolecule , *Ser36'} . 

phrase <psl, protein,- fpal , ' - * , ctf ] , . 'psl -ctf • r r) . 

wdef {sh2. domain, ^SK2 0 . 

wdef {sh3. domain, '9H3') . 

wdef {she. protein J ^ she ' ) - 

wdef { signa 1 eome y complex , signalsome} , 

wde£(eitee^ site, site). 

wdef (eoa , . protein, ' Sos ' ) , 

wdef (etaurosporine, STOallmolecule, ataurosporine) . 

wdef (eta , scnallmoleculer ' STS • J . 

wdef(tct, complex, 'T-cell receptoj ' ) . 

wdef (tetracycline, smallmolecule , tetracycline) . 

wdef'("th£^229raminDacid. "ThrSa?' ) , 

wdef < thrJOe , aminoaoid, ' Thr306 * > , 

wdef (thr3B9, aminoacid, 'Thri&g^ ^ , 

wdef (threonine, aminoacid/ threonine > . 

wdef (tyrosine, aminoaoid, tyrosine) - 

wdef {unresponsive, state , inactive} . 

wdef (unstimulated r atata, inactive). 

wdef (&vad,8TOallTO>lecule, 'zVAD') . 
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% lexsyn,pat 

% revised >3arch i7, JO 00 

% BYHTACTIC LEXICON FOR ACTSO^?S 

% Contains syntactic entries for action type words an^ phrases 

% 

% synp ( +Worcil , +Wordliat , +Syn) 

synp; wordl is first word of phrase, wordlist ia list of words i 
n phrase 

eynp: Syn is syntactic categorey 

% 

& eyn«(+Wd>rd, +£yn) is same as synp except there is no wordlist 

synp (account , [account r for] ,v} . 

synp (account , [account , f or] . vp) , 

eynp (accounted^ [^cco^^nted^ £pr] , ved) , 

eynp (accounted, [accounted, for] , ven) . 

eynp (accounting, {accounting, fori ,ving) . 

eynp (accounting, [accounting, for) ,n> . 

eynp (accounts r [accounts, for) ,vp) . 

aynptadd, [add, up] ,vpj - 

aynptadd, [add, up] , 

synp (added, [added, up)oVed}. 

synp (added, (added, up} , ven} , 

eynp (adding, (adding, up].n}. 

s>Tip (adding, (adding, upj rVing> * 

synp<adds, [adds^ up] .vp] . 

synp{aTTir [Am, a , means , of, producing] ^ vp) . 

synp (am, [^m, due , to] , vp) . 

synp (are. Care, a ^ means, of , producing] ,vp) , 

synp (are. [are, due. to] rvp) . 

synptae, [aa ^ a, result rof] rprep) . 

synp (attributable, [attributable, to] . vp> , fe ? 

synp (attributed, [attributed, to] r^en) - 

synp (based, lbAs^d,on] ,ven} . 

eynp (based, [b&fiedjUpon] , van) . 

eynp(be, [be. a,Tiiftan^, of r producino;} , v> . 

eynp ( be , [be. du* , to] , v ) . 

&ynp (because, [bftcauae , of] , pr«*p} . 

eynp<been, [baen, ajm^^tne. of , producing] , ven) . 

&ynp(been, (teisnj due, to) t^^ti^ • 

»ynp (being, [being, a, means, of , prcsdueingl , n) , 

^ynpCbeingr (being, a, means, of , produoingl ^ving] . 
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©ynp (being, [being ^ due , to? ,n) . 
s'ynp (being, [being , due , tol ,ving) . 
synpt caused, [caused, by) .ved) . 
synptcaueedr [caused r by] ,ven} . 
synp {convey, (convey, a , signal] ,vj , 
synp (convey , ( convey, a < signal 1 , vp) ; 
s y np ( con vey e 6 , I conveyed ,a, 3 igna 1 1 , ved ) . 
5ynp(conveye<3/ (conveyed, a, signal] rven} 
synp (conveying , Ccnt^nveying, a, eignal] , ving) . 
syT^P (conveying, foonveying, a, signal ] , n} . 
synp ( conveys , [conveys , a , s ignal ] , vp) . 
synp (<3isao<:i ate, (diosociate, from] ,vp) . 
synp (dissociate, [diasociate , frc»m] ,v) . 
synp (dissociated, [dis&ociated, irom] , ved) . 
synp {dissociated, [disaocieted, from] ,ven) - 
synpldissociatesr [dissoeiainas, from] ,^^> . 
synpldisBOciating, [dissociating, from] , n) . 
synp{dieecciating, [dissociating, from) ,ving) . 
synp {dissociation, [dissociation, £romJ,n) . 
synp {down, [down, ^ , regulate] ,v) . 

synp {down, (dovm. regulate] ,vp) . % A down-reguiatea B h 

B 

synp {down, {down, ^- ' .regulated] rVed) . 
synp {down, [down, ^ - \ regulated] ,ven) . 
synp (down, [down, ^ , regulates] ,vp) . 
eynpfdown, [down, ' - * , regulating] .n) , 
feynp (down, [down, ' - \ regulating] , ving) . 
synpfdov/n, [down, r regulation] ,n) . 
5ynp (due , [due , to , the , f act , that} , ad j ) . 
$ynp {due, [due. to] , adj ) . % ? 
synp {form, [form, complex] ,v} , 
synp {form, [form, cetnplex] ,vp) . 
synp {format ion, [formation, of, eomplfex] ^n) . 
synp{fornied, [foritued, complexl rVed) . 
synp{f omwd, [formed, complex) rVen) . 
«ynp{formingf (forming, complex] ,n) . 
synp (forming, [forming, complex] ,ving) . 
synp (forma, | forms, complex} ,vp> . 
Bynp (had, (had, an, active , role, in] , ved) . 
Bynptb^d, [had, an, active, role, in] ,ven) - 
Bynp(h^5r rhas,an,activerrole, in] , vp) . 
Byrtpfhavfe, [have, an, active, role, in] ^v) . 
synp (have, [have, an, active, role, in] ,vp) . 
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aynp(havingr {having, «n, active, rol«. inl , n) . 

g>^-ip(havin9r [having r an, active, role . in] iVing) . 
eynp(i3^ [ie , a,means, of , producing] , vp) . 
eynpiia, lis, due, to) ,vp) . 

synp< functions. [ f unctions, as r a, negative, regulator, of 1 .vp) , 

synp (function, {function, as, a, negative, regulator, of] ,vp) . 

synpdeaiS, [lead, to) , v) . 

syrtpdfiads^ [lea<3e , co5 , vp) . 

43y tip (leading, [leading, to] , n) . 

igy ftp (leading, [leading, to] ,ving >. 

eynpCleads, IleatSe, tol ,vp ), 

g/np(l€d, [led, to] ,vei3) . 

aynpded, [led, to] ,ven) . 

$ynp(mayr I may , be, responsible, for] ,vp) . 

eynpCmediate, [mediate, BignAl), v) , %A mediates a signal to 
B 

aynp<Tnedi3te, [mediate, a, signal], vp> . 
synp (mediated, [mediated, a, signal], ved) . 
synp (mediated, [mediated r a, signal], ven) . 
Bynp (mediates, [mediates, a, signal], vp) , 
Bynp (mediating, [mediating, a, signal] /■ n) . 
Bynp (mediating, [mediating, a, signal], ving) . 
eynp! mediation, [mediation, of , a, eignAl},n). 



np ( n , 


[n, 


• - S acetylate) , v) . 


synp (n, 


[n. 


' .acetylate) , vp> , 


synp <n. 


[n, 


' - \ acetylatedl , ved) . 


eynp (n. 


[n, 


^ - ' . acetylated] , ven) , 


Bynp(n, 


[nr 


' - ^acetylstee] , 


eynp fn. 


[H/ 


' - * , acetylating] , n) , 


synp(n, 


[n. 


' - ' .acetylsting) ,ving) . 


synpvn, 


[n, 


' - \ acetylationl , n> . 


synp(n, 


[n. 


^ - ' , acyl^te] , v) . 


synp<n, 


[n, 


^ - ' ,$cylAte] ,vp) . 


fiynp{n. 


(n. 


' - ' , Acyiated) , ved) . 


synp (n. 


In* 


' - ' , Acylatedl , ven> . 


synp (n. 


[n. 


' - ' , Acylatftsl , vp) . 


synp (n. 


[n, 


' - ' J acylating] , n) . 


s^-np (n. 


[n, 


« - ' , acylatingl , ving) . 


synp in, 


{n, 


• - ' J acylation] ,n) . 


synpfn. 


(n, 


' - ' , glycosylate} , v> . 


synp(n, 


[n. 


• - ' ,9ly{^t>sylat6] ^vp) . 


synp(n, 


[n. 


' - ' fOjlycrosylatedl , ved) . 


synp(n. 


(n. 


» - ^ ,9lyt66ylatft6] , ven> . 
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©ynp 
synp 
synp 
synp 
synp 
synp 
synp 
synp 
synp 
eynp 
Bynp 
eynp 
synp 
synp 
synp 
aynp 
synp 
synp 
eynp 
eynp 
synp 
synp 
synp 
synp 
synp 
synp 
synp 
synp 
©ynp 
synp 
©ynp 
synp 
synp 
synp 
^ynp 
aynp 
synp 
synp 
synp 
synp 
synp 
synp 
aynp 



n. In, 
n, [n. 

(n, 
n, [n, 
n. (n, 
o, lo, 

O, [Of 

<i| [Or 

Oi lo, 
[o, lo. 

Of [Or 

o, [q, 
only f 
[prolyl, 
prolyl, 
prolyl , 
prolyl , 
ipxolyl r 
prolyl t 
prolyl , 
prolyl , 
Teauit r 
reBult , 



glycoeylatee] ,vp) . 
glytoeylacing] ^n} , 
glycosylating) ,ving) . 
glycosylationj ,n} . 
terminal rprot^lysial ,n) . 
glycosylate!] ,v> . 
glycosylate] ,vp) . 
glycosylated} , ved) . 
glyc^oaylatedt] .ven) . 
glycoaylatesl ,vp) - 
glycosylating] rO) . 
glycoaylatingl rVing) . 
glycoaylation) ,nj . 
(only, after] ,prep} 



[prolyl, '-\4r ,hyd2ro>tylate] ,v >, 
prolyl , • - ' r 4 r ' - ' , hydroxylat*] , vp) . 
[prolyl, » - • , 4 , ^ - S hydroxy lated] , ved ) . 
prolyl , ' - ' , 4 , ' - ^ , hydroxylated] , ven ) , 
[prolyl, ,4, ^ - Shydxo?^ylatee] ,vp> , 
[prolyl, ,4, •-Shydrc^xylatingl ,n } . 
prolyl , • - • , 4 , ' - ^ hydrcjtylating] , ving ) 
prolyl ; ' - ' r 4 , ' - s hydioxylation] , n) . * 
result. from] , 
reault, f irom] , vp) . 
result, [result, inl ,v) . 
[result, [result, in] ,vp} . 
[resulted, [ resulted r from] , ved) . 
resulted, [reeultedf from] , ven) , 
[reeultedr [reaulted, in] ,ved) . 
[ resulted, [result frdr in] , ven) . 
[reeultins, (resulting, frorn] . n) . 
reeultins, [resuiclng, f rom} .ving) , 
rewlting, [resulting, in] ,n} . 
resulting, [rfeaulting, ii^J ,ving) . 
reeulta, [resulta, from] ,vp) . 
results, [results, in] .vp) . 
set, £s€t, free] rvj . 
set. ieeX-p free] ^v) . 
sec, [eet, free] ,ved) . 
set, [set. free] rVed) . 
set, [set. free] rven) , 
set, [set. free] .ven) . 
set, [set, free] ,vp} - 
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aynptsetB. [aetSf fre6i],vp>. 
aynptsets, [sets, freel.vp). 
gynp (eetcing^ [setting, freel.n), 
Bynp (setting, (t^etting, free] ,n) . 
synp {setting, [netting, free] ^vin^) . 
synp (setting, l&etting, f reej , ving} . 
synp(jsuppress, [suppress, activity, of]^,v) . 
Simp {suppress, [suppress, activity, of ]\vp) - 
3ynp<euppresse;3, [guppreBsec3, activity, of),ved). 
synplauppresB^d, [suppr&aaed, activity, afj.veti}. 
feyr>p{fiuppreB2e&, [Buppresses, activity, of] rvp) , 
siTip(auppreseing, (supprasfling^ activity^ of] r 5^5^ 
synp (suppressing f [b oppressing, activity, of ) .ving) . 
synp < suppression r teuppression, of , activity, oi],n). 
synp {switch, [switch, on. the, activity, of],vp). 
sj'Tip (switched, (switched, on. the, activity, ofl , ved) . 
synp (switched, (switched, on. the, activity, ofj.ved). 
e>'Tip (switched, [switched, on, the^ activity, of] ,ved) . 
B>Tip (^witched, tswitched, onj the, activity, of).ved). 
©i^npf^witched, [switched, on, the, ^ctivity^ oij^ved). 
B>TXp(&witchefi, [s\^itchea, on, tha, ^ctivity^ of),vp), 
eynpfup, [ijp, ' - ' ^ regulatQ] , v) . % A up -regulates B B A 
e>Tip(up, lup, regulate] ,vp} . % A yp-r^gvlAtee B 6 A 
Bynp (up, fup, • - ' , regulatetS] , v$d) . 

aynp<up, tup, \ regulated} rven> . % A up-regulates B B A 
s>Tip (up. [up, ' - S regulates} . vp) . 

s>Tip<up, fup, ' - ' /regylatingl .n) . % A up-regulatee B B A 

eynp(up, fup, regulating) ^ving) > % A up-reguiatee B B A 

synpiup, tup, " - ^ .regulation] ,n) . 

^ynp(was, [was. a, means, of , producing ] ,v&d) . 

eynpfwaS/ (was. due, to] , ved) . 

eynp(weare, [were , a , means ^ of ^ producing] , ved) , % ? 

$ynp(wer*^ [were, due, tol , vedj . 

&ynwfac«tylate^ v} . 

synw(acfetylate, vp) , 

synv; (acetyl at ed,ved) . 

synv.' ( acetyl at ftd, v6n) . 

synw(acetyl5ites, vp) . 

eynw(acetylating,n) . 

synw(acetylating, ving) . 

synw (acetyl ation,n J , 

synw (activate, v) . 
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synw (activate ^ vp? . 
synw (activated, ved) - 
syTW<activ^ted, ven) . 
synv/(activ«tes, vp) , 
synv{activating,n) . 
syaw{ac:tivating, vifig) , 

5iynw(add, vp) . 
Byrm^ (sdded, ved) . 
synw f ©d^^ed ^ ven) . 
synvf (adding, n) . 
synw ( adding, viag) . 
eynw (addition, n} , 
Bynw(adds, vp} , 
synw (after. prep) . 
synw { aggregate . v> . 
^ynw ( aggregate , vp) , 
synw (aggregated , ved) , 
synw ( agg rf ^ga ted , ven ) . 
syr^w{ag9r«gat€s, vp) . 
synw (aggregating ,n) . 
synw (aggregating , ving) . 
synw (aggregation ^n) . 
synw (arrest. n} , 
synw (arrest. v) . 
gynw (arreat, vp) . 
aynv (arrest edj ved) . 
ayiiw ( arrested J ven) . 
synw (arresting, n) . 
synw (-a rr e s t i ng , vi ng ) . 
synw(arresCB, vp) . 
synw (associate, v) . 
synw (associate rvp) . 
synw (associated. ved) . 
synw (aeeociated, ven) . 
synw (aeeocia ted, vp) . 
synw (associating, n) . 
aynw (asaociiting, vin9> . 
synw (association, nj . 
synw (attach , v) . 
aynw(attach,vp) » 
aynw (attached ,v$d) , 
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synw (attached .ven) . 
synw < attaches , vp) , 
synw < attaching rd) , 
ay nw (attaching ,ving) , 
synw (attachment , n> , 

ByDw{bindt, vp) . 
synv; (binding f nJ . 
synw (binding rVing) . 
synw (binds, vp) - 
synw (block, v) . 
synw (block, vp) . 
synv (blockage, n) . 
synw (blocked, ved) . 
synw (blocked, ven) . 
synw (blceking , n> , 
synw (blocking, ving) . 
synw {blocks ; vp) - 
synw< bound rVed) . 
Bynw (bound, ven} , 
eynv(bir«^k, v) . 
aynu»(breakj vp) . 
synw (breakage, n} . 
synw (breaking, n) . 
synw (breaking rving} ^ 
»y7iw (breaks, vp) , 
synw (broke , ved) . 
&ynw (broken, vAti) . 
&ynvi (ccitalyz^tion, n) . 
aynw (catAlya* , V) . 
aynw{cat&lyze, vp) , 
aynw (catalyzed rVed) . 
synw (catalyzed rven) . 
synw (catalyzes rYP) - 
synw{catalyzing, n) , 
synw {catalyzing, ving) , 
synv/{cauaationrn) , 
synv (cause rii) . 
eynw (cause, v) . 
eynw (cause ,ven) . 
aynv;(cau$€ i vp) . 
synw ( caused, ved) - 
dynw( causes, vp) . 
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aynw(caviair»9, ving) . 

synw(cle&vage,n) . 

synw(cleave, V) , 

synw (cleave, vp> . 

aynw { cleaved, v^d) . 

sypw{cleaved, ven) . 

synw f cleaves, vp) . 

synv/ {cleaving, n) , 

eynw (cleaving^ ving) . 

eynw(coit«munoprecipitate ,v) , 

ayiiw<cc?iinmunopxecipitate, vp) , 

synw (coimmuni^precipitated , ved) . 

synvj ( coimmunoprGcipi t Qted , ven) . 

synwCcoimmunoprecApitQteBrVp) , 

aynw(coirnmunoj>recdpitating ,n>, 

aynw(coiJnmunoprecipitating ^ving) - 

synw f coijnrnunoprecipit ation , n> , 

symi combi na t ion , n ) - 

synv (combine ,v> . 

eynvj i oombi ne , vp ) . 

synw (combined , ved> . 

synw (cocnfoined ,veA} - 

eynw (combines, vp) , 

synw {combining ^n) . 

eynw (combining ^ ving) . 

synw (conjugate , vK 

gynw (conjugate , vp} , 

synw (conjugated *ve) . 

eynw (conjugated , ved) , 

symv(conjugate6,vp) . 

eymv (conjugating ,n). 

»ynw (conjugating rVing) . 

eym; (conjugation ,n) . 

Bynw (connect ,vp) - 

synw (connect fV) . 

synvc (connected ,ve) 

synw (connected ,ved). 

synw ( connect ing , n) . 

eynw ( connect ing , ving) . 

ay nw ( connec t ion , n > . 

synw (connects, vp) . 

synw (constrain, V J , 
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eynw (constrained, ved) , 
eynw(coiascraineii,ven) . 
6ynw (constraining, n) . 
aynw (constrainii^g, ving) , 
$ynw (const rains J vp) . 

synwfcopxBcipitater v) , 
synw tcQpxEcipitate , vp) . 
syuw (copreclpitated, ved) . 
eynw (coprficipi tared, ven) . 
Bynw(coprficipitates, vp) . 
eyrtwtcQpr«Gipit3ting,a) . 
synw{coprecipat5ting, ving) - 
aynw{CDprecipitation ,n) s 
eynw[coparif ication ,n) . 
flynw{copurlf ied ,ved) . 
gynw{copurif led ,veii) . 
synw{Gopurif iee , vp) , 
synw(Gopurif y ,vp) . 
synw(copurif yj V) . 
synw<copurif ying jH) • 
©ynw (copuxi f yingf , ving) . 
synw (c(;>upie rVp) - 
ey nw { coupl e , V ) . 
synw (coupled. ved) . 
synw (coupled, ven) . 
synw (couples , vp } , 
synwf coupling /ft) . 
synw (coupling, ving) . 
feynw (ciit , n) - 
synw(cut,v) . 
synw(cuCr ved) , 
synw (cut , venj - 
synw(GutrVp] . 
eynw (cuta, vp) . 
gynw (cutting, n) . 
synw (cutting / ving) . 
synw (deactivate, V} . 
synw ( deactivate, vp J . 
synw (deactivated* ved) . 
synw (deact iv^t«d , ven) , 
synw (deactivates, vp) . 
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eynw (deactivating, ving) . 
eyrtw (deactivation, rt) . 
Bynw {death, n) , 
'&ynw (demethylate, v) . 
eynw(demethylate, vp) . 
synwfdemethyiated, vfrd) . 
synw (demethylated, ven) - 
E?yiHtf (demetliylates, vp) . 
synv; (demethylating.n) . 
synw (demathylating, ving) . 
&ynw(demethylation, n) , 
synw ( depho b phory 1 at e , v } . 
synw { depho b phory 1 ate, vp ) . 
&ynw { depho &pboryIa ted, ved) . 
Bynw{dephoephorylated, ven) . 
eynw{depho&phorylates, vp) . 
synw {depho ephoryl a ting, n) . 
3ynw{dephoeph6rylating; vIhq) . 
synw { depho flphorylat ion, n) , 
synw {die, V) . 
synw {die *vp) . 
gynw{diedr ved) . 
synw (died, ven) - 
synw {dies, vp) . 
Bynw{dieesa6inble, v) , 
synw {da e assemble, vp) . 
eynw{dii9d^sefnble(3« ved) . 
©ynw{diedsai*mble($, ven) - 
synw ( disassembles, vp) . 
&ynw (disassembling, n) , 
synw(disasBe?nbling, ving) . 
synwldieaseeinbly, n> . 
synwtdiech^rige, n) . 
eynw (di&charge, v) . 
eynw (discharge, vp) . 
synw(discharsed,v«d) ; 
eynw (discharged, ven) . 
eynw (discharges, vp) . 
eyrtw(diaeharging,n) . 
eynw (dischargingr ving) - 
eynw (disengage, V) . 
aynwtdiaan^age, vp) . 
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synw{diseng&9eTnent.n] . 
synw{disenga9«&, vp) . 
synw {disengaging, n) . 
synw (disengaging, ving) . 
synw (divide, v) . 
synw (divide , vp) , 
synw {divided, ved) . 
synw {divided, ven) , 
stynw {divides / vp) . 
synv<dividing, n) . 
synw (dividing, ving) . 
9yTi^v(divi&ion,ii) . 
Bynw{dyin0rA> . 
5ynw (dying rVing> . 
synw(erthance, v) , 
synw (enhancer vp) , 
synw(erihanced, ved> . 
&ynw(enhend:ed,ven3 . 
a ynw (enhancement, n) . 
a ynw (enhances, vp) . 
aynw (enhancing, n) . 
synw C enhanc i ng j vi ng > . 

gynw (express, vp) . 
synw C expressed , ved) . 
synw ( expressed , ved) . 
synw ( expressed rven} - 
aynw (expresses rvp) - 
aynw (,expressing , n > . 
synw (expressing, n) . 
synw (expressing, ving) . 
aynw (expx-^ddion, n) . 
aynw (generate, v> . 
synw (generate, vp) . 
aynw (gemeratedr ved) . 
aynw ( ge ne rated , ven ) . 
aynw (generates , vp) . 
synw (generating, n) . 
aynw ( ge ne rat ing,vin9) , 
synw (g^berat Ion , n) . 
eynw(heiiJ,v) . 
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synw(hew«d, ved) . 
synw (hewed, ven) . 
synw tbewing, n> . 
synw (hewing r ving) . 
aynw thews, vp) . 
flynw (hinder, V) . 
aynw (hinder, vp> . 
synw (hi nt3« re <^rVe*d) . 
aynw ( hindered rV(?n) . 
fiynw (hindering r^) 
synw (hinderin9,viA9J.. 
synw (hinders , vp) . 
synw (hindrance < nj , 
synv?(inactivat&.v) . 
synw ( inactivate, vp) , 
synw (inactivated, ved) . 
synw (inactivated, ven) . 
synw (inactivates, vp) . 
synw (inactivating r rs) . 
synw (inactivating, ving) . 
synw (inactivat ion, n) , 
synw ( incite, v) . 
synw ( incite , vp K 
»ynw (incited, ved) . 
synw (incited, ven) . 
synw(inciteTnent,nl . 
synw(incites, vp> . 
synw (inciting^ n) . 
synw (inciting, ving) . 
synw (induce , v) , 
synw (induce , vp> . 
synw ( induced, vfid) . 
synw (induced, ven) . 
synw (induces, vp) . 
aynw( inducing, n) . 
aynw( inducing, ving) , 
ay ntM^ (induction, n) , 
eynv? (influence, n) , 
Bynw(inf luence, v) . 
aynw{iJifluence, vp) . 
aynw{ influenced, ved} . 
aynw( influenced, ven} , 
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synw (influences rvp). - 

synw(inf luen<=?tng, n) - 

synw (influencing, viy>g) . % ? 

fiynw{ inhibit, v) . 

synwt inhibit, vp) . 

aynwi inhibited, ved> . 

eynw (inhibiting, n) - 
aynwCirthibiting, ving) . 
aynwanhibiticn^n) . 
synw (inhibit SfVp) * 
synw(initiAt«,v} . 
synw (initiate, vp) . 
synv; (initiated, ved) . 
synw (initiated, ven) . 
synw (initiates rvp) . 
synw (initiating, n) . 
eynw(initiating,ving) . 

aynw (initiati<3n,vp) . 
eynwdnstigatfi; v) 
synw {instigate, vp) . 
syn"w{insci9atedrved) . 
synw( instigated, van) . 
synw (instigates, vp> . 
synw (instigating ^n} . 
eynw (iuetigafcing.ving) . 
synwdnatiSfAtiDn/n) . 
synw(interact rV> . 
synw (interact . vp) . 
synwtinteractedrvad) . 
synvCinteractedrVen) . 
synw ( interact ing , n) . 
synw ( interact ing,ving] . 
synw (interaction, n) . 
aynw (interactions .n) . 
eynw (interact e,vp) , 

eynv; (j coined, v$ci) , 

eyrtw < joined, ven> . 
eynitf (joining, n) . 
eynvr (joining, ving J . 
eynti* ( joirte fVp) - 
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synw {juncture, n) . 
synw (liberate, v) - 
synw (liberate, vp) . 
©ynt^ (liberated, vfid} . 
eynw(lib&ratedrven> . 
isynw (li berates, vp) . 
synw (liberating, n) . 
eynwdibeirating, ving) . 
^yn-w (liberation, n) - 
«ynw (limit , v) . 
synw (limit J vp) , 
synwdimitatio?^, n) . 
synvdimited, ved} . 
eynw (limit sd, ven) . 
synw (liit^itingr n) , 
synwClimiting, ving) . 
synw (limits, vp) . 
synw(linJc,n> . 
• ey^T*' (linkr v) . 
synw(linkfvp) . 
©ynw (linked ^ ved) .. 
syr»w ( 1 inked , ven } - 
gynw ( 1 inking, n) . 
gynw ( 1 inking , ving) , 
sy nw ( 1 inks , vp ) - 
synw (mediate - 
synw (inediate , v^) . 
synw (mediated^ ved} . 
syriv? (tn&diated, ven} . 
synw (mediates, vp) . 
syyiwdmediatingrn) , 
synw (mediating rving} . 
synw (mediation, n) . 
synw (THfith yl ate, vp) . 
synwtmettiylate, V }. 
synw (methylated, ved )- 
synw (me thy 1 at ftd, ven ). 
synw fmethyl^tes , vp) . 
synw(TTvethylating^n ) . 
aynw (methylatingj ving 
8ynw(methylation, n) . 
eynw (modif ication, n) . 
eynwlniodif ied, ved) . 
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synw (modified, ven) . 
synw(?nodifies,vp) . 
synw (modify. . 
^ynitf (raodlf y c vp) . 
&ynw (modify ing, nj . 
©ynw (modifying rVing) . 

eyrtw (mutate, vp 3 . 
fcynw (tnutAted. ved) - 
synw (mutated, ven) . 
synw (mutates, vp) . 
synw{imjtatingrn) I 
synw{mutating, vincj) - 
synw {mutation, n} . 
synw {overexpress , vK 
synw{ovexexpreBS . vp) . 
fiynw(overexpreBsed,ved) . 
synw (overexpreesed , ven) . 
Bynw (overs ^ipx'e as ea ,-vp] . 

$ynw{overexpreasing, ving} . 

aynv <p.air ,vp] . 
synw (paired, ved) . 
synw (pa i red r ven ) . 

synw (pairing, ving) . 
synw ( pa i rs , ) . 
synw ( pho ephory lateen) . 
synw (phosphoryla te , vp) . 
synw (phoBphoryla ted, ved) . 
synwtphoephorylatedjven) . 
synw(phosphorylates,vp) . 
synw (pho spho ry 1 a t i ng r n ) . 
3ynw(phoBphorylating,ving> . 
synw (phoephorylat ion, n) . 
synw (prompt e,v> . 
fiynw(promotie,^/p} . 
synw (promoted, ved) . 
fiynw (promoted r van) . 
synw(proiflotea, vp) . 
3ynw(procnQtin$r n) . 



Page 15 



<Wa_0063687A1TL> 



wo 00/M61I7 



synw (promoting, ving) . 
synw < promo tion,n) . 
synw {prompt , n ) . 
synv/(prompt, v) . 
6ynw(pxompt, vp) , 
synw (prompted . ved) . 
synw (prompted , venj , 
synw(proT7i(pting,n) . 
synw (proTTipting, vingj . 
synw (proTTTipts , vp} . 
synw (react, v) . 
gyrtw f react, vp) - 
Bynw< reacted rVfed) . 
synw {reacted, vfen) • 
&yftw freaotingr . 
eyrtw{reactingr'wiiig} . 
synw (react ion , n) - 
synw (react©. vp) . 
synw (regulste, V) , 
synw ( regulate, vp} . 
synw (regulated, ved) , 
synw (regulated rven) , 
synw ( regulates rvp) . 
synw (regulating, n) . 
synw (regulating, VI ng) . 
Bynw (regulation, n} . 
synw (rel ease y n) 
synv(relea$$ r V) . 
synw (release , vp) , 
»ynw {released, ved) , 
synv^ Trel eased, ven) . 
synw (releases rvp) . 
synw (releasing, n) , 
eynw (releasing, ving) . 
eynw ( removal ,n) , 
aynw( remove rv) . 
synw(remove,"vp} , 
synw (removed J ved) . 
synw{renicru-edi ven) . 
synwfrewc'veB^vp) . 
synw ( removing , n> . 
eynw (removing , virtg) . 
synw ( replace . 
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synw ( replace , vp) 
aynw t replaced rved) . 
gytiwi replaced rven) . 
&yT^w (replacement , n) . 
eynw (replaced vp) . 
synw (replacing, n) , 
synv (replSLCing, ving) . 
synw<repr&ea, vp) . 
synw{repie9aj v) . 
synw{repreeaed. ved) . 
synw {xepi'Sflsed , ven) . 
synw{rftpressee r yp) ■ 
synw{repressin9#n> - 
synvr {repressing, vlng) , 
aynw{repreBsiori, n) . 
aynw {requi r e , v ) . 
3ynwr(requirei vp> . 
gynw (requi red/ vetS ) - 
synw ( requi red^ ven) . 
eynw (requirement . n} . 
eynw (requires, vp) . 
eyrvw (requiring. n) . 
aynwtreijui ring, ving) , 
aynwl restrain, vp) , 
flynw( res train, v) , 
synw (restiainetS^ved) . 
synw(reetxaine6,vfrn) . 
aynw( restraining rH) . 
eynwt restraining r ving) . 
eyftw ( restrains r vp} . 
synw i restraint r n) , 
synwtsensitiaatiOTr n) , 
synw (sensitise f vp) . 
synw (sensitise, vj . 
synw(3enfliti2ed, ved) . 
synw<sensitisftd, ven) 
synw{eBnsitisftfl^ vp) . 
synw(Benj9iti2in9,n) . 
Bynw(een«itisin9, ving> . 
Bynw (separate, v3 . 
0yT3w(eeparat«rVp) - 
eynw ( Sf^paratfrd, ved) . 
B/nw (separated, v^n) . 
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6ynw{aeparating, n) . 
aynw (separating, ving) . 
laynw (separation^ n) . 
sysw (sever, v} . 
Bynw (eev^r , vp) . 
synw (severance, . 
synw i severed , ved) . 
synw (severed rVen) . 
synw (severing ill) . 
synw (severing, ving) . 
synwfeeverg^vp) , 
synw (sicjnal ,v) . 
synw (signal rVp) . 
synw I signaled/ ved) . 
synw ( signaled, ved) . 
synw (signaled, ven) . 
synw (signaling, rt) . 
synw (signal ing,vdng) . 
synw ( s igna 1 b r vp ) * 
synw (split , n] . 
$ynw (split, v) . 
synw ( spi i t , ved J . 
symtf ( spl i t , ven) . 
synw (split., vp) . 
synw ( s pli t B r vp) . • 
synw (split tip^, nj . 
symv (splitting, ving) . 
3ynw ( et Imul^te , v) . 
&ynw ( et imulate , vp ) - 
eyriw (stimulated, ved) . 
synw (stimulated, ven) . 
&ynw(stimulates,vp} , 
syn^v (stimulating, n} , 
synw (stimulating, ving) , 
synw < stimulation, n> , 
synw (substitute , v) . 
synw {substitute, vp J . 
synw ( substituted, ved) . 
eynwleubatituced,ven) . 
gynw(eubatituces, vp) . 
eyTiw{eubstitiJCing, n) . 
synw {substituting, ving) . 
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6ynyr(substitution,n) . 
synw { suppress . vp) , 
aynw [ suppress . V) . 
synwC suppressed, ved) . 
synw(suppreeeed,ven) . 
nw ( suppr e Bees, vp J , 

3ynw(5uppre£3it^9i ving? . 
synw (suppression, n 5, 

3ynwCtie,v) . 
&ynw(tie.vp) . 
synwttied. ved) . 
synw(taed, ven) . 
synw (tiee. vp) . 
synw (transcribe, V) . 
synw (transcribe. vp) . 
synw (tr*nscribe<i, ved) . 
aynw (transeribecij ven) . 
synw (trAJtiseribeB, vp) , 
©ynv < transcribing r n> - 
eynwi|tran3cribin'3, ving) . 
synw (t;r^nscript ion. n) . 
synw (tying, n) . 
synw (tying, ving} - 
9ynw<obiquitiniaatiOT,n) 
synwfYJbiquiciniae, v) . 
©ynw{ubiquitiniserVp) . 
aynw<ubi<iuitini5ed, ved) . 
aynw<ubiquitinised, ven) . 
synw(ubiquiti5nizM^ vp) . 
synv;{ubiquitini2ing, n) . 
eynw{ubiquitinisingp ving} . 
6ynvx(utgft,n) . 
Bynw(urgft,v) . 
B>Tiv/(urg«, vp) . 
synv/ (uxg$d,ved) , 
syn-w (urged, v&n) . 
Byn\>f (urges, vp) , 
B>Triw (urging rn> . 
synw (urging rving) , 

% the following are verbs connected with complexes 

synw if arm, v) - . 
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synw (form, vp) . 
synw( forms, vp) . 
©ynw ( formed , ved) . 
synw ( formed rVen) . 
synw ( forming r rt) . 
synw ( f orymation, n) . 
eynw($fi6^mble, v) . 
eiynwt^si&emble, vp) . 
6yr>w < assembles. vp) . 
eyrvw(^£&embled,ved} . 

synw (assembly, . 
synw{dissas3ecubler v) . 
synw{dissas3ecntale', vp) . 
synw{dissas3ernbleB, vp) . 
synw{disBae3enibled^ vb6> . 
synw (disBassembl^, vert) . 
synw{disBaseemhling, n) . 
9ynw{dia©as&embly,n) . 
synw {dissociate rV) . 
aynw (dissociate, vp> . 
flynwt dissociates, vp) . 
synw (diBBOciafcedr ved) . 
eynw (di BBOciated, ven) . 
aynw (dissociating, n) . 
aynw(difiaociation,n) . 
aynw (recruit , v) . 
aynw (recruit , vp) . 
synw (recruits ,vp) . 
synw (recruited, ved) . 
synw-.{recruited, ven) , 
©ynw {recruit ingyn} . 
synw{recruittn^iit,n) - 
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% lexsemact ,pat 

% revised March 17, 2D0«? 

% SEMANTIC LEXICON OF ACTIONS 

%%%%%%% 

ft For cfsnomics - ths gratnmar tesns for semantic and syntactic cate 
goring 

% separstfjly for action type of categories: for substances the lex 

% entries are the same as in the medical area 

action type phrases have two entries; a semantic entry and a syn 
tactic entry ' • 

This lexicon contains the seiosntic entries for words and phrases 

% semp is a lexical entry for phrasal lexicon 

% Bemp(+Wordir+SBin^+Wordlist , +Targetform, 4rFeatures> 

% aerap epecifiee a semantic lexical definition for the genomics li 

cerature 

% semp ie equivalent to the preidicate "phrase'* in the medical area 
% semp: Wordl is firet vord of phrase, Sem ia aeinantic category 
% semp; Wordl iet ie liet of words in phrase, Target form is output 

form 

^ eemp? Features i$ a list .of 2 elements or the atom "def " represe 
citing defaul 

% semp: Features 1st element is rev or nrev meaning Tceversed or no 
t reversed 

% semp: Features 2nd element ie a # specifying number of argumentg 
for action 

aemp: Features - def is equivalent to a iist = Inrev, 2] 
ft in case action has 1 argument, use 11, Jl 

fesertivj ia a lexical entry for single word 
aemw{+Word, +SeTTi. +Targetf orra^ +Featuree) 
% semw: the arguTwnta are the same aa for aemp except there is no 
Wordl ist 

%%%%%%%% 

multifile (^emp/5) - 
multifile (3eTTiw/4) . 

semp (account , cause, (account , for] , cause, [def 1 } . 
Bemp (accounted, cause J [accounted, fori i cause, [defl > . 
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semp (accounting, cause, (accounting, for] , cause, [def 1 ) . 

semp (accounts. cause / laccQunfcs, for] .c^uae^ [def] ) . 

semp(ad)6, attach, [add, upl , attach, IiSfifl). 

6^rtip(added, attach, [added, up], attach, (def]), 

semp(adde, attach, tadds, up], attach, {def]). 

aemptare, c&use, [are, a^meane rOf , producing) , cause, [def] ) . 

aemp (are, cause, [ar&,due, to] ^cau^e, [2,xbv1) . 

semp<as, cau^e, [5&, a, result , d£1 ^c^uee, [2, rev] ) . 

semp (attributable, cause, tattributabl&, to] , cau^e , [S/rev]) . 

semp (attributed, cauae, [attributedr to] ,cauae, [2, rev]) . 

aemp (baaed, cause, Ibased.onJ .cause, [2,rev)> - 

sectfp(baa^,c^use, [based, upon] , cause, [2rrev]> . 

ee^iip (b^^iauae, cause, [because, of] , cause, (2,rev]> . 

eemp (convey, signal, (conveys, a, signal], signal, [def)). 

eemp (conveyed, signal, (conveyed, a, signal], signal* Jdef 1 > . 

gemp (conveying, signal, [conveying, a, signal] , signal, IdefJ). 

gemp (conveys^ signal, [conveys, a, signal] , signal , [def]). 

i3$<np(diaaociate, release, [dieaociate, from], release , [def ]) , 

aevnp (diaaociatedc release, tdissociated, f rom] , release, [def ]) , 

semp {dissociates, release, [diseociates , from], release, (def ]) , 

seinp {dissociation, release, [dissociatiOA, from], srelease, [def 1 ) , 

seinp{down, signal , [dovn, ' - ' . regulate] , signal , [def] ) . % A dQwn- 

regulates B A B 

sewp (down, signal r [dowft, ' - ' , regulated] , signal^ tdef 1 ) . % A down 
-regulates B A B 

eemp (down, signal, [down, • - ' , regulates] r signal, [def] ) . % A dovm 
-regulatea b a B 

semp (down, signal, [down, • - ' , regulation] , signal, [def] ) . % A dow 
n-xegulatea B A --> fi 

d^mp (due/cfause, [due, to, the ^ fact , that] , cause, [2, rev] } . 

i^empCdue, tfaaae, [due, to] , cause, 12, rev] ) . 

aempiform, attach, [form, complex], attach, [def ] J . 

a ennp (formation, attach, [formation, of, cQcnplexJ , attach, [def]}. 

sernp(formed, attach, [formed, complex], attach, fdef}), 

semptforms, attach, [forms, coTupLex] , attach, [de£l). 

semp(had, cause, [had, an, active, role, in] ,cau&e, [def]) . 

sempfhaa, cause, [has, an, active , role, in] .cause, [i3ef ] ) . 

semp{have, cause, [have, an, active, role , in] , cause, tdef]) . 

semplie^ cause, [is, arineanSrOf , producing] , cauae, [def 1 } . 

semp (is, cause, [is, due, to] , cauae. [2. rev] ) , 

Bemp( functions, inactivate , [ funct ions, ae, a. negative , regulator, of 1 r i 
nactivate, [def]) . 

eemp (f untftion, inactivate, [function^ as, a ^ negative , regulator, of ) , ina 
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sst^mp im^i^iat&6 f signal, 
A mediates a si^rtal to 



ctivate, f^ef 3 } . 

aemptleacJ, caue* , [lead, to] . cause, [d6fi ) . 
&eTnp(lead,cauaei, [lead^tol .cauae, [def] ) . 
semp (leading, caxjse, [leading, to) r cause , idef] ) . 
semp( leading, cause. [leading, to) , causer tdef] ) . 
semp( leads > cause, [leads, to], cause, [def] ) . 
semp (leads. causel , (leads, to J .c^use, [def]) . 
sertip ( led , cause , [ 1 ed , to] , cau»e , lde£ ] > . 

semg(may, cause, [may, be. reapoasible, for] , caua&f Idef] ) . 
©emp(Tn$di^t6,' aignai, (mediate, a^ signal) ^ signal, [def}). %A 
mediates d aignil to E 

[«nediated, a, signal], signal, [def]). % 
B 

[mediates, a, signal], signal, tdeE] ) . % 
k mediates a signal to E 

eerap {mediation, si9nal, (rrediation, of , a, signal], signal, fdef])- 

%A mediates a signal to 0 
a&mp(n, createbond, [n, ' - ' ,acetylate] , ' M-acetylate' , [def ] J , 
afemp(n, Greatebond, [n, ' - ' , acetylated) . 'N-acetylate' , {det]} . 
^ecrt^frt, createbond, [n. ' - ' , cEcetylatee] , 'N-acetylate ' , [def]) . 
aemp(n, createbond, [n, ' - ' , acetylation] , *N-acetylate ' , [defl) . 
aeT[np{n, createbond. fn^ ' - ' , acylate] ^ 'N-^cylate ' , [def]), 
3emp{n, createbond, (n, ' - ' ,acylated] , 'W-acyl3fc$' , Idef] ) . 
saitipln, createbond, [Or ' - ' ,acylat$a] , 'N-acylate' , [def] > . 
sempfn, createbond, fo, ' - ' , acylation] , u^-acylats' , [def] ) . 
©^mp(n/ cireatebond, [n, ' - • .glycosylate] , 'W-glycosylace ' , [def] ) . 
&emp{n, createbond, [n, ' - ' .glycosylated] , ' M -glycosylate ' , [defl > . 
^eTOp(n, cireatebond, Cn, ' - ' r glycosylates] , 'N-glycosylateS tdef J ) , 
9eTnp(n^ <!i:«atebond , tn, ,glycosylation3 . '^3-glycosylace • , [def]} . 
eeTnp(n,brea]d3ond, [n, , terminal , pa^oteolys is] , ' n- terminal proteoly 
sis' , [def] ) . 

lo. ' - ' , glycosylate] , 
[o. ' - • , glycosylated] , 
[o, • - ' , glycosylates] , 
[o. • - • ,glycosylation] 
aernpi only, time, [only, after] , 'only after' , 
semp (prolyl, createbond, [prolyl , ' - ' , 4 . ' - ' , hydroxylatej , 
'prolyl -4-hydroxylate' . [def]} . 
createbond, [prolyl, ' - ' . 4 . ' - ' , hydroxy lated] , 

'prolyl -4 -hydroxylate' , [def] ) • 
createbond, [prolyl, ' - ' . 4, ' - * , liydroxylatee] , 

' prolyl -4-hydrDxyiate' , [def]) . 
createbond, [prolyl , ' ' , 4 , ^ - ' , hydroxylation] , 



mp ( o , cx&A t ebond , 

semp io , createbond, 

semp ( o , createbond^ 

& emp ( o , cr eat ebond , 



semp {prolyl, 
semp (prolyl , 
semp (prolyl, 



'^-glycosylate ' , 
' 0-glycosyi ate * 
'0-glycosylate ' 

/ '0- glycosylate 

(2, rev]) . 



[defl) . 
. fdef ] ) , 
, {def J ) , 
'.[def] J, 
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'prolyl- 4 -hydr^ixy lace ' , [def 1 } . 
eemp(reflult, cause, [result, frocnl .cause, [2, rev] ) , 
9einp(reBult/ cause, [result, inl .cause, [def I ) . 
seinp {resulted f cauBe, [resulted, from] , cauee, [2 r rev] J. 
semp{ resulted, cause, Iresulted, in] , cause, tdef] ) , 
semp (resulting, cause, (resulting, from) , cause, (2, rev] ) . 
semp (resulting , cause, [resulting, in] , cause, ItSef}) . 
.5emp(reeults; cause, [result a, frotn] , cause, fS^revj) . 
5i?mp (results, cause, [results, in) , cause, Ldef}) . 
6emp(aet, release, (set, free], release , [def ] ) . 
semp(9et, release, [set, free], release , [def ] ) . 
e^mptseta^ release, [sets, free], release , [def 1 ) . 
eempt&ettingj releaaSf [setting, free], release , [def 1 ) - 
semp( suppress / inactivate, [suppress, activity, of], inactivate, [ 
def]}, 

Bemptsuppreeaed, inactivate, (suppressed, activity, of], inactivat 
[defl). 

aemp (suppresses f inactivate, I&uppr^eeee, activity, of], inactivat 
e. Idef ] } . 

semp < suppress ion, inactivate, [euppreaeioiif of , activity, of], inac 
tivate, (defl } • 

eesnp (switch, activate, [switch, on, the, activity, of], activate 
, (defj). 

^emp {'switched, activate, [switched, on. the, activity, of], acti 
vete, [def 3 ) . 

sempt switches, activate, [switches, on, the, activity, of], acti 

vate, [def] ) . 

semp (up/ signal r lup, regulate 1 , signal, [2, rev]}. % A up-regul 
atee b b a 

eemp (upj signal , [up, • - ' , regulated] . signal , [2 , rev] ) . 
^emp(up, signal, (up, ' - ' , regulates] . signal, [2. rev] ) . 
eemp (up, si<5nal , tup, regulation] , signal, [2, rev]) . 
Bemp(>^^g, cause, [was, a, means, of , producing] , cause, [def] ) . 
Becap(w&e,cauee, [was, due, to] , cause, (2,revl) . 
sesnpCwere, c&use^ iwere,a,nieans , of , producing] , cause, [def]}. 
aeittp (were, cause, [were, due. to] , cause, (2, rev]) . 
seTOw(acetylat,e, createbond, acetyiate. [def]) . 
semw{acetylateei, createbond, acetyl ate, [def] ) , 
semw{acetylates, createbond, acetyl ate, [def] ) . 
seTOwfacetylacion, createbond, acetylate, [def]), 
semw (activate, activate, activate. (defl) - 
s^nw (activated, activate, activate, (defl) . 
semw (activates, activate, activate, (def]) . 
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semw (activation, activate, activate, (def J ) . 
sew(add, attach, attach, Idef J ) . 
seTOw(added, attach, attach, [def]) . 
semw (addition, attach^ attach, [def]} . 
sem.w{ddds, attach, attache CcSef ] ) . 

semwfaf ter, time, after, 12, revj > . % temporal relations 

semw (aggregate . attach, attach, [def 1 ) . 

tie mw (aggregated , attach, attach, (defl ) - ^ 

semw [aggregates, ^attach, attach, [d&f] > . 

sernw (aggregation , attach, attach, [def]) . 

semw (arrest, inactivate, inactivate, [def] ) . 

&emw (arr<»&ted. inactivate, inactivate, [def 1 ) . 

semwfarrests, inactivate, inactivate, [def ]} . 

©emw (afifeociate, attach, attach, [def]) . 

semw (ae^QCiated, attach^ attach, [def 1 ) . 

semw( associates/ attachr attach. [def] ) , 

eemw{aespciation, attach, attach, [def]} . 

gemw(atta<ih, attach, attach, [def]) . 

aetnw (at cached , attach, attach, [def] J . 

aetaw (attaches, attach, attach, [def 1 J . 

$einw {attachment, attach, attach, [def]) - 

eemw (bind, attach, at tach, [def] ) , 

sei^w (binding, attach, attach, fdef ] ) . 

eemw (binds, attach, attach, [def]) . 

^emw (block, inactivate J inactivate, [def]) . 

serawfbioclced, inactivate r inactivate, IdefJ) . 

semw (blocking, inactivate, iitactivate, fdef]) . 

aemw (blocka , inactivate, inactivate 4 (defl) . 

©emw (bound/ attach, attach, [def )) . 

semw (break, breakbond, 'break bond', [def] > , 

semw (breakage, breakbond, 'break bond' , [def]) . 

semw (break©, bre^kbond, 'break bond' , [def]) . 

semyr(broke, breakbond, »break bond', (def)) . 

se^nw(bro)cen, breakbond, ^ break bond' , [def ]) • % caee without break 
bond 

3emw{cataly»at ion, promote, catalyze, [def]). 
eeiTiw (catalyT-e , promote r catalyze, [defl) . 
semw (catalyzed, promote, catalyze, [def] ) - 
a&mw (catalyzes. promote, catalyze, [def] ) . 
a&tnw (catalyzing, promote, catalyze, [def]) . 
e&tcM ( cause , cause , cauae , Ide£ ] > . 
eemw (caused, cauae, causer [def ) 
Bemw (causes. caua$,cauaer [def]) . 
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ai^mw (cleavage , breakbortd, 'break bond\ (def 1 ) , 
aemw (cleave, breakbcn^r 'break bond* , [def ] ) , 
aetnw (cleaved, breakbortd/ 'break bond (d^f J > . 
aemw (cleaves, breakbond, "^break bond' , Idef)} . 
aetnw(cDirmnunoprecipitat^. attach, attachr Id&f]}. 
semw{coirmiunopr«cipLtated , attach, attach, [def]). 
seraw{coicnmunDpj:eeipitatea, attach, attach, [defl). 
3emw{coimTnunop3r&cipitciti.<:>n ^ attach, attach, [def]) . 
s<5raw{coTnbinatioft ,attach, attach, [dftf] ) - 
semwicoinhine , attach, at tach, tdef ] ) . 
semw {combined , attach, attach, [def] ) , 
serawicoTObinegt, attach, attach, [c3et) > 
semw {conjugate , attach, attach, (def 1 J . 
semw (conjugated , attach, attach, [d^f] ) . 
semw (conjugates , attach, attach, (def] ) . 
semw (conjugation r attach, attach, {de£l>- 
semw (connect > attach, attach, (d«f]}. 
semw (connected , attach, attach, Idefl). 
semw (connection , attach, attach, [def] ) . 
semw ( connect attach, attach, [def 1 ) . 
eemw (constrain, inactivate, inactivate, (def 3 > . 
Bemw (constrained, inactivate, inactivate, [def]>. 
Bemwfconetraina, inactivate, inactivate, [def 1 ) - 
eemw (constraint , inactivate, inactivate, [def 3 ) . 
Bemw(coprecipitate, attach, attach, Idef]} . 
BEmwtcoprftcipitated, attach, attach, fdefl) . 
afecnw(coprecipitates, attach, attach, [def ] J . 
aemw (coprecApitation , attach, attach, [def]) . 
aemw <copurif ication , attach. attachr [def 1 ) , 
semwicopurif ied , attach, attach, [def 1 ) . 
semwtcopurifies, attach, attach, [def] ) - 
seniw{copurify , attach, attach, [def]), 
semw {couple , attach, attach, [def] ) , 
semw (coupled, attach, attach, [def] ) . 
semw (couples, attache attach, Idef} ) . 

semw (cut, breakbond, 'break bond* , [def ]) , % leave bxeakbond onl 

senn^(cutg, breaJcbond, •bi'eak bond' , [def] ) . 
ijemw (deactivate, inactivate, inactivate, [def D ) . 
eemw (deactivated, inactivate, inactivate, {def ]) . 
Becnw (deactivates , inactivate, ictactivate, (def] ) . 
Bemw (de^ctivati^Hj inactivate, i.naCt ivate, [d^f ] } , 
semw (death, pxoceBe, death, [1] } . 
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semw(deTuethy laced, breakbortiS, demethylate, [def ] ) . 
se-mwCdeiuethylates, breakbond, decnethylate/ (def]) . 
semw (demethylation, breakbond. deroethylat^r [def] ) - 
senifw(dephoBphory^ate, breakbond. dephoephorylate. Cdef ] ) . 
eeww(dephosphDry laced, breakbondr dephoephorylace, [def] ) , • 
eemvf {dephosphorylates, breakboiid, dephoephorylate, Idef 1 ) . 
eemw{dp.pho^phorylationr breai^bond, dephosphoxylate, [def 1 ) . 
sew (die, procesB, death r [1]) . 
»e]rtv/{di«d, proceas, death, [i]> . 
semv/(die3r proceBs, d^sth, [llJ. 
seinw{dieapsemblfe, release, release, [def] ) . 
6emw{di0aeseiTiblfed, releaser release, [def 1 ) . 
semw (diaaBeembles^ reilease, release , [def ]) . 
semwidiaaBBembly, r^l&ase. release, fdef] ) . 
semwtdischaTrgfe, f^l^Ase, release, [def J ) . 
semw (discharged, release, release, [def] ) . 
semv.' (discharges, release, release, [def ]) , 
eemw (disengage, releaser release, [def]} . 

(disengaged, release j release, [def] ) , 
aetrnx" (disengageraentr releaser xeleae^, [de£] J . 
setrnv (disengages, release, release, Ld^i) ) • 
afemw<divider breakbond, 'br^ak bond', [def]}. 
sernw (divided, breakbond, ^breelc bond', (def 1 ) . 
Sftifiw (divides . breakbond, 'bre&k bond', [def] J . 
semw{diviBiOT, b«akbond, 'break bond' , Idef] ) . 
seinw{dying, proc«s3, deatl^^ [1]) 
semv/ (enhance r procnote , promote , [d<5 f 1 ) - 
semy/ {enhanced, prompt's J promote, [def 1 } . 
semv; {enhancement, promote, promote, [def 3 ) . 
Berovf (enhanoea, promote .proTODte, Idef] ) . 
©emw [ enhancing, promote, promote, [def)) . 

^eTnw(expreas, generate, express r [def]) . % .can have either 1 or 2 ar 
gumenta 

aem^v (expressed, generate ^ express, [def ] ) . 
aemih' (expresses , generate , express, [def] } , 
aemw (expressing, generate, express . tcief 3 ) . 
eernw (expression, generate r express, [def] ) , 
semw (generate, generator generate, [del]} - 
secnw(generatedr generate, generates, f.de£] ) - 
3ecnw(generateBrgwer£itejgene3:at6, (defj) . 
seitiw {generating, generate, generate, [def]) , 
semw {generation, generate, generate, [def] ) . 
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hew, breskbond, "br^ak bond' , [def]} . 
hewed, braakbond, 'break bond' r [def] ) , 
hews, fcreakbond, 'break bond' , (def 1 ) . 
hinder^ inactivate, inactivate, (def ] ) - 
hindered, inactivate, inactivate, [d^l] ) - 
hinders, inactivate, inactivate, [def]) . 
hindrance, inactivate, inactivate, [def]). 
inactivate , inactivate, inactivate , [def] > . 
inactivated, in^ictivate, inactivate, Idef 3 ) . 
inactivates, inactivate, inactivate, tdef ) ) . 
inactivat-ion, inactivate, inactivate, [def] ) 
incite, activate, activate^ [def] ) - 
incited, activate, activate , [def )) . 
incitement, activate, activate, [d^f 1 ) . 
incites, activate, activate . [def )) . 
induce r activate, activate, [def] ) , 
induced, activate, activate, [c^ef]] . 
induces, activate, activate, (def)> - 
induction, activate, activate, [def]K 
influence, activate, activate, [defl ) . 
influenced, activate, activate, [def ]) . 
influences, activate, activate . [def ]) . 
influencing, activate, activate, [def]), 
inhibit, inactivate, inactivate, [def]), 
inhibited, inactivate, inactivate, [def] ) . 
inhibition, inactivate, inactivate, Jdef ] ) . 
inhibits, inactivate, inactivate, [de£]) . 
initiate, activate, activate, [de£] ) . 
initiated, activator activate j [def] J . 
initiates, activate, activate, [d€f]>. 
initiattiottj activate, activate, [def ]> . 
inetigate, activate, activate^ [def]}. 
instigated, activate^ activate, [def J) . 
in&tigate^, activate, activate, [def ) ) . 
in&tigation, activate, activate, [def] ) 
interact, interact, interact , Tdef J ) . 
interacted, interact, interact, [defl>. 
interaction, interact, interact, fdef] ) - 
interactions , interact , interact , [def] ) . 
interacts, react,- interact, [def] > . 
join , attach, attach, [defl) . 
joined , attach, attach, [def ]> . 
joining, attach, attach, [def 1 ) . 
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semw (joins, attach, attach. [def] ) , 
semw (juricture, attach, attach, [defl ) . 
aemw( liberate, reiease, release. [def] ) . 
£?-Fsrrr*; (liberated, release, releaser Idef} ) , 
5ecnw(lil>eratea, release, releaser [cSefl) ^ 
serow{ liberation, xeiease, release. [def]) . 
6eww{limit. inactivate, inactivate, [6et] ) . 
aeinw{limit:ationr inactivate, inactivate, Idef] } . 
seffiwUimiced.. inactivate, inactivate , (cSefH ■ 
semwdimits, inactivate, inactivate, Idef]) , 
semwd ink, at tptch, attach, [def 1 ) . 
aemw(link6<i,att5kch, attach, [def J) . 
i3fetnw{ linking, attach, attach, fdef J ) - 
a^inuf{linka J attach, attachr Idef J > . 
6emw{medi^te. promotft, prowiote, (def ] ) . 
i3emw(mediat€^3, pronvote, promote, Idef] ) . 
eemv; (mediates, promote, promote, [def J) . 
eemw Icnediation, promote, promote, [dftfl > . 
peTTrfCmethyiat^ < createbond, methylate, Cdef J )' . 
afemw (methylated, createbond. ?nethylate, [defl> . 
aewiwfmethylates. createbond. methylate, [def ]) . 
semwlmethylation, createbond, methylate, [def]) . 
seww (modification, modify. modify, [def]) . 
semw (modified, modify r rnodify, [de£]) . 
semw (aiodifie5, modi fy, modify, [def] ) . 
eemw (modify. modify rTTiodify, [def J ) . 
9emw(TCJodifying,niodify, modify, [def] ) , 
eemw (mutate. modify, mutate, [l] ) - 
aerm* (mut s ted, modify, wutat« f [11}. 
sentw (mutates, niadify, mutate r [11 > ^ 
semw (mutating, modify ,mut ate, [1]) . 
seTOW (mutation, modify, mutate. [1] ) , 
serRiw(overexpreBBed, generate, over expreee. [def 3 ) . 
seim? ( o verexpr e s ses , ge nera t e , o ve r expr ess, C def ] ) . 
senYw(overexpressing, generate roverexpress, [def] ) - 
semw (over express, generate, express, [def] ) . 
gemw (overexpresaion, genera t e. overexpreas, [def] > - 
semwCpairr attach. attach. Cdef]) . 
semw (paired, attach. attach, (defl) . 
semw (pairing. attach, attach, (def ] J . 
semwCpaira J attach, attach, [def}) . 

a emw ( phosphor yl at createbond, phpaphoxylate , [defl ) - 
a6mw(phosphoryl^t€d, cr^atebond, pho^phorylafce, [def]). 
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s^mw 

B^tnw 
semw 
semw 
sernxv 
Bemw 

semw 

A ; 
semv/ 
semw 

^emw 

semw 
se^nw 
sew 

semw 
semw 
semw 
semw 



phosphorylate^, createbonci, phosphorylaite, [def ] ) . 

phosphoTylation, createbond, phosphorylate, {def 1 ) . 

prec de, cause, cause, Idef)) . 

precededr cauae, cau^e, [def)). 

precedes, cause, cau^e, [def)). 

preceding, cause, c^uee, [de-£)K 

promote, pi'omote, promote, (def]) . 

promoted, pr ©mo te.prortiot 6; Fdefl ) . , 

promotes, prcriTiot^, promote^ Idef) ) . 

promotion, promote, promote, {def]> . 

prompt, activate, activate, [defl ) . 

prompted, activate, activate, [def] ) . 

prompting, activate, activate, Idef] ) . 

prompts r activate, activate, [de^I J - 

r&actr react, recoct, [def] ) - 

reacted, react, react, [def] } . 

reaction, react, react, [def 1 ) , 

reactions, react, react, [def 1 } , 

react a, react, react, [def} ) , 

regulate, signal, signal, [def] ) , 

regulated, signal, signal . [def ]) . % B i$ aregulat^d by 

. B 

regulates , signal, signal, [def]) , 
regulation, aignal, gicfnal, [def] ) . 
releriBe, releAfie, rel*afl&, [def J > . 
released, release, release, Idef] ) . 
releases, release, release, [def] ) . 
removal ^ breakbond, ^ break bond ' , [def] ) . 
remove, brea}d3ond, 'break bond [def]). 
remove, breakbond, 'break bond 'rldef]), 
removes, breakbond, 'break bond [def]) . 
replace, substitute, substitute, [de£] ) , 
replaced, substitute, substitute, [d^f ]) . 
replacementa substitute, substitute, [def)) . 
replaces, substitute, substitute, Idef]). 
repress, inactivate, inactivate, [def]). 
repressed, inactivate, inactivate, [def]). 
represses, inactivate, inactivate, [def]). 
repression, inactivate, inactivate, [def ]) - 
require, cause, cauge, [5, revj } . 
required, cause, cause j (2, rev] ). 
requirement, cauae, cauae, [2, rev] ) . 
requires, c^use, cause, (2, rev) ) _ 
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s&mw {requiring, cause, c^y^a, (2,revl ). 
i5emw{xeetrain, inactivate, inactivate, [dftfl } . 
Bemw{r^^trainecif inactivater ii^activate^ [<iefl> * 
se?nw{ restrains . inactivate. Inactivate , {iJefl ) - 
setnw (restraint , inactivate, iTiactivste, (d^fl ) . 
feern* (sensitization, activate, activate, [defl ) . 
sertiv/ (sensitize, activate, activate^ [d&f ] ) . 
senwC sensitized, activat<£, activate, Idef ] ) . 
semw (©ensitizea, activate, activator Ic^ei] ) , 
siemw (s^jjarate^ breakbondr 'break, bond' , (def j ) . 
feeftiw (separated r breakbond, 'break bond', (def J) . 
£eniw( separates, breakbond, 'break bond*, Idefl) . 
semw (separation, breakbond, 'break bond' , Ldef) } - 
semwtseverr brealcbond, 'break bond [def ]} . 
semw( severance, breakbond, ' break bond' , [daf] > - 
©emw (severed, breakbond, 'brsak bond », {def ]> , 
©enw (severs, breakbond, 'bsceak bond^ [def)) . 
semw (signal, signal , ^^ignal , [def] ) , 
semw (signaled, signal, si^n^l , [def ] > . 
sermtf (signaling, signal, signal, [d^f ] ) • 
serm\! (signals, signal, signal , [def ]) . 
semw (split, breakbond. 'break bond' , ldef] ) , 
semw(splitB, breakbond, 'break bond [def] > . 
semw (apl it ting, breakbond, 'break bond' r [def}) . 
eemw(stirnulate^ activate, activate, [def j ) . 
eemv/(fltimulatad, activater activate, [def] ) . 
afe!nv/{ at itnulates, activate, activate, [def] ) . 
lifemv/ (atiroulation, activate, activate^ (def J ) . 
aemv/ (aubatitute, substitute^ substitute, [def J ) . 
Bemv/ (Bubstitut&dr substitute, substitute, [def 1 ) . 
aeT0w\*6ubstitotea, Gubstitute, substitute, [def I ) . 
aernw [substitution, substitute, substitute, Idefl). 
aemwC suppress, inactivate, inactivate, [def] ) . 
semv; (suppressed, inactivate, inactivate, [def)) . 
semv/ (auppressee, inactivate, ici^ctivcitBr [del ] ) ■ 
aemv*' (suppression, inactivate, inactivate, lde£] ) , 
semw (tie , attach, attach, [defl ) , 
semw (tied^ attach, attach, [defl} - 
semw (ties, attach, attach, [defl>- 
serrw (transcribe, generate, transcribe, [def] ) . 
senw (transcribed, generate, transcribe, [def]) . 
semw (transcribes, generate, transcribe, [def]) . 
semw {txan^cxibing, generate, transcribe, [de£] ) . 
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senn^Ctranscrlption, generate, transcribe, Idef]) , 
semw(ubiquitini£e, createbond. ubiquitinize, [def 1 ) . 
semiw(ubiquitini2e, createbond, ubiquitinize, tdef ) ) . 
sernw (ubiquitini.zed, createbond, ubiquitinize, [def ] ) . 
aenwtubiguitinizesr cr^atabond, ubiquitlrti^e, [d^f 1 ) . 
afecnwCurge, activate, activate , [def J ) . 
eesnwCurge, activate, activate, tdef 1 ) * 
aemwforged; activate, activate, [def ] ) . 
st^TOw furgea, activate, activate, Idef) > . 
semw (urging, activate , activate , (def 5 ) . 
semwfforin, attach, attach, [def])- 
semw (forms, attach, attach, tdef)) . 
seinw(forTnedr attach, attach r [d6f]> . 
&eTnw{ for Tiling, Attsch, attach, ^d^f } J . 
3eTOw{f orraation, attach, attach, Cd«£] ) . 
a9Tnw{a9Bemble, attach, attach, [def ] > . 
semwfaBBerabl^s, attach, attach r [def] ) - " 
©emwIasBembl^d, attach, attachr tdef] ) , 
s€TTvw(asBeTOblingr^ttachr attach^ [def 1 } . 
semw(aseeinbly, attach, attach, [def 1 ) . 
$€rnw(diB$ade$TAb2ef release I r&leaae, [def]} . 
eemwtdiee^es^^mbl&a, releage, release, [def}) . 
s^Tiiw(diBs^a$ettibled, releag^, release, [def)) . 
s&entii (diasaasembling, release, release, (defl) . 
semwCdiasassemblyr release, release, [def]) . 
semw (dissociate, release. release, [def] J , 
semw (dissociates. release, xeleaBe, Idef]) , 
semw (dissociated, release, reieaeer [def]) . 
aenw (dissociating, release, release, [def] ) . 
aemw (dissociation, release, release, Idef] ) , 
semwtrecruit, attach, attach, (def) ) , 
seiYKW (recruits r attach, attach, [def]) . 
secnw (recruited, attach, attach, [def] ) . 
Betnwfrecnjiting, attach, attach, [dHf]) . 
Bemw (recsruitinent , attach, att<ach, [def] ) . 
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\ «dit&d G£non>9 grammar - adapted frem MedLE^'e grammar for ud« with MedLSB 
% thia is to bft up»d aiwi^ with the ganotnica lexicon of *ubBt*Aces, a^:tloJ^a. 
^ Ana ?r«la tiontf, 
V reviiftd M*rch L6, April 5, 2OO0 

* adjusted for tagged input 

iimltifile (wd«</i) . 
i' multif ile < phrase /S) . 

% 

Sr Writ tan by Carol Friedman for tbt WadLBB System * 
»• Queena OolLege of ttt« ctty qniveraity df New York * 

% High^^t. L^vnel Predicsco - «4iA_s$nt - lat a-cg. is target structusre 

% * . 2tsd arg. ia « list <?f words in sentence' 

y - 2Td arg. id ' t3 ' 

% Target strudtu^c: a frame or «itt of canxi»<ted frames: 

k the frame descri5&*» an action or eeveral related active; 

y an action fr*we Is a liat cOAaiacing of ttte aynitool 'action'- 

I fDll<?wi«d 5»v the oDde for the action and argument fi- 

t the ^rrgumenta are aithtsr aubatance* or actions; 

% each aubatancB slot conaista of the name «-f the typ« of 

% e-ubstance followed by the valoe for tha *tibatancs; 

^ tha substaAce slot may coniraln slots f«r saver* ^ subatanc^e. 

%• ExacnplsB : 

% Blocking of iL-2 gen© truntcription by activated r*pi . 

* t*otiQnj. inactivate^ £f>rcicein,Rapi^ [gtate^activa] 1 , 

* {action, transcribe, [xl , (g«nfe. intfirlw^in-2] J } 

% The adapter prot^ict eritl was as9oc3l«t«d with botb phoapborytated cbl and the' 
t guanidine nuol»i9tid$-raleaein9 EaCtar G3g. 
tf [act ion ^ a tt *Ch , I pr ot sin, CrfcLj , 

% [relation, And r Lprotein«Q>l, [Btata,pbo«phorylat«d] 1 j 

* IprotBia.goanidine nucieocide-teleaaing factor C3G 

* CBCate, phoapbOffVlatedJ ] ] ] 

If f jiil «n unknown predicate 

u<Lknown<_,fail> . 
s- op£5D0, ty, loot, once]) . % same priority ftnd type a4 \+ 
:- Qp(700, xfK, [\-,-«]). ^ same priority and typtj aa = or — 

% snoop isK generally need to find inpuc string vhBn MSting a DCO 
% the input atrzA^ uaed for coAStraints 

sem_aent {P , g<wlia t ^ X) - • > 

{Mfliert {addatotal (OM ) . 

a«injparfl6(P,fteroll8t,x} . 
a*m_jiiarae (Target r Semi Uei 

aani^attern<<P,SeniliBt) , 
aeiajparae {Target, SemliatiX) •«^> 

s»ni^attern6{p,Semli*t) , 

sem_endornot Target . xi . 

Bero__p*rae< [faiLure] ^^rX,_,__i 
addatotal (X)T 

af«»_endornottPrPiX> % P ia t^J^get if th*r« is an endmark 
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{a^atotaKX^ } . ^ y is CiucnbeT of tines Attached exidinarVc 
seoi^ftndorn^c : - % did not reach endsA&rk; update cwtie and f«il 

" viptotal, failT 
eemjendom^t [f ailur^l . X, _ J ; - 

«dddtatalO(> , % K is numbei: o£ ticn«$ reached 

X >* 50. 



% Finding p^tterna 



Bem_pattQxns Semi let) - -> 
pate«rn(Fl, 5einll3&> , 

{Pl \= % lat finding ehool-d noC be empty 

morep6tb^e^yi(s, P2, semlift? , \ connected paCderna 
(getrclatlon (R, Fl , F2 , ) . 

* The action pattern types ar< ; pattern, n^unactlonpatt , actpdCt- and ^ 

* noun^ictpobt . * 

* pattern --> actionai-gCAi) * 

* ac!tlve or paaBiv>» verb * 

* actlon3jg<A2) , * 

* pt^ttarn nounactionpatt . ♦ 

* p^attern actpe^tt- * 

% pattern ia aaved in a ayinbol table fat} r chacH Cor anccoa/fail-uiif^ 1st 
% case vhera p^ttairn la in at And hae been i^ucceaaful 
pattern tPmt,_ J ckeckat (pAttern.^.a, mit) . 
\ case where pattern is in at ae e fAj^lure. 
pattem(_,J ch^cXat (pattern* _rf.„J. <i> f*tl-) • 

^ pattern. $'.*an action pattern, \jith a nocninal verb 

* P*a, cleavage by svad, 

% apopto a 1b -Induced cleavage of by zDfiV&. 
pa t te arn-l. P , Swcctl i • t ) - - ^ 
so4>cp(S0, so) , 
{ \+ checkBttpatterrt, 5,_^_^S0,_) , 
act:ionchkO«mli»t) }, 
ncunactionpatt (F) , 
snoop , 
( «dd8t(pattem, 5,a.7,fl0,s) 



* pattern 1: <in action/aubat*nc!e acce on an actlan/subctance 

* the activation of rapt inhibits the -eKpi-frsaion of il-2 

% rapX functlond ae a ne^^itive ragulatox^ of tcr-madiated il-3 gena 
% transcript ion. 

pattern (F,$enilifit^ anoop(BO,SQK % SO is tha input string^ 

{ \+ chsckst (p*fct«m, ir_r^, S0,_) . 
actioncbk<5eiiiilSat) , 
canciect;eh)t (Semllat } } , 
actiianarg lAl) , 
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cCunnectACt tseiso, tv. vp^v©d3 ^Tarfiifet. Pt^tyre^J , 

snoo^ ( S , 5 } , % en44 sentence Hat 
{ member <dBf, Feature* J, 

cnodlist nM,A3,£it8] ,Mods) ; 
wetribej: Irev, Paatures) ^ 

f rame (f,*cc ion, Tavgftt, Mods) , 
addst (p«tt«ro, l,s, F,SQ,S) 

}- 

^ pattern 2: an >cticiA/*v»i>ataiicfl wa* acted on by an act ion /cube Cane* 
k The a^gre^afcioii of bad was auj^preeaed. 

i The aggregation of bad wac iuppresaed toy the phoepho*vlacion of Jnk- 
^ Orb2 w«» aaeociatcd with cbl. 

% Apoptoala- associated cleavas* of endogenous ?fii. vaa blocked by the 
t treatmant with zVAD. 
pactamty, 5*11)1 tac> 

sn«70p f&O,£0> , % 80 is the input dtk^lng 

act ionchHC Semi iat} , 
connvot ohk ( semi 1 a t > ] , 

Bem_beteml_J , ^ waa 

connectact (sem, IvanJ , T*rsfet . PeatuT»»J , %activ«fced 
o©tbyarg<Ai| . 

ftnoop(&,d)r lending eentenoo llat 
{ (member (d«i. Features), 

TfVPdil5tnAl,A3,SiCe) .Mods) 7 
rfrember (ifiv^FTe*tuxe&) <• 
tnodlist ( [A2,A1.. Site] r Mods) f , . 
f ram* ^F, action. Tarmac, Moda} , 
AddBt (pattern, 2 , s ^ &0 , S> 

}- 

V pattern 3; an action/substance acted ^ an accion/*^^batanc« 

% %)«d induced phoaphoryLAtion o£ fyn, 

t ccr and cd2a-ra*6i«eed 11- S tr«n»crl|^tiQn, 

pattern IP, S*Tnllft) 

{ \+ chfickflt (pattern, J*_i_jSO,_}, 
actianchk(5«i«Ll(&t) , 
connectchk^Semliat^ }, 

actionAr^tAl) , t 8u]»frt«r^ce or basio action 
t optda«h, 

connectacta (awn. [vp , ven, vod] , Target , Featured) , ' * activated ' 

s optof, 

actionar9(A2} , % bad pattern here 
8iioop{-B,S) r 
( imfttrtbeaf (def / Feature-s) , 

wodliatt [Al,A2rSit«J .frloda) ; 
ittetnher (rev, Feature a) , 
modliati tA2>Aa,SiC0] ,«Qds)>, 
frame (P, act ion, Tiir^eCr Mods} , 
addst(p«Cte)rnr9^0rF.&O. 6) 
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\ pACtern 4: a siiHple action pattern with an active v«rb. 
fc Activated R«£-l phoaphorya«teB MEK-l. 
pattern IP, aemlist) 
snoop (90. fiOJ , 

Rebeck -that sentence haa nt^ action word/phrase 
{ \* chBcfcftt (pattern, 4,_,_rSD,_). 
actioncHkCSeinliat} }, " ' 
actpatt IFJ . 

{ addac(pattem,4,9,P, S0,€» 



% no mo** patterns - save fail^are 

p«tt«t^<_,J a<ad5t[pattei'A,0,f , {!, fail). 

* sem_fr»or«pAttern (-Rel, -P, 4Semlistr -^S? : 
% Hel is a relation and its value fromer 

\ P is th© r«Tn*inirtg patternB^ Sco^liat ie th© ii.€t of Baiw^fltic claaBee 

% in aentcrtC« 

% if havB a *«rleH of \'Br ub» the relation ''aud" or "or* i£ in tho tiftst 
t M<Si ft^K« thdt the r^iatioci 
morepatceYTi<RrP.9«TTalst> — > 

sem^re-I^ktion (Ri^Mcxli) , %r«lation and cnodLfiera 

C«w_3C>attema (F^SamliatJ j 

{( f rame f?^rcl.Conj2,_) J .% P c^ontaiisg nested relation 

(Conja - and? Con^^ - or> , frame <Ri,ral, , % Rl relation txAina 
Cramfi (ftjY«.l,conj2,_) * value of T^lation iB"'Con32 

Kl \= (] . \ where do Typ^r^ Value and Mods 2 cnntB from? 
fr«im« (Rl, Type, value, r4od2] , \ get compdn^nce of original r«J»tian 
mergemodB (nodi ^TfodS , Hodal , 
{ ModB = il, fi:amfe<Rf rel.valu*, CI) , !? 

%fraina(|i,^«l, [valuelKoda] , [] ) % make it r&X connsctor with r«l ffod 

R - Lrel, [Value|l>tods]] 

) 

)• 
). 

% no moTB findings 
rt»orepatteirn( I] , LJ ,_,5rS) . 

\, actionarg ia th« «rgi>inent of pattern 

b actionaz<9 ift either a eubstance or a basic action 

t actionaiig ie saved in a sywboi table tst} r check for fiuc^eea/fai^ur^ let 
tf Case v/hera actiDn*r$ i*..i^ st an^. hav^ft ):>^en aucce^aful 
actionargCA) cJvtckafc (actionargr _. 6. A) - 

^ Cas4 whftr« actionarg ia in at aa a failure. 
accion*rg(J eheckat (accionar^r_, f r ( i . fall J . 

^ actlonarg i: a aiibsteuto* 0r auS&Btances 
^ Hapl, activs Rapl^ Cbl «nd Crkl 

aceianargtAj snoo^feo, 50) r * so is the input atrin^ 

{ \* cftficket (actionarg, SD,_) } , 

«\il5«tajice5 (A) r 
anoQp(&^s) , 
{ addst(aation€ir9>l««,A,SD«2t ). 
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I actioftarg 2: a pxo'cese like apc^ptosis, or a (Siaflas© 
acticiji*rg (A) — p snoop (SQ, SO) , \ SD is the input atfing 
( checkat [Ai3tionarg,2._,^,S0r.) }r 
procQaapatt CA) , 

{ add:st tactionax^/S, ArSO. 9) 

}. 

\ actlonarg 3 1 a nominal action pattern 
% EtopoBidc-i reduced apopcosis . 
*% gtOpo5i<ift- induced PS I clMvige by a-VAP. 
actianars(A) srtoop fsa.SOf , * dO i* the input «trtng 
( \* checket Cacticmairj, 3,_,^,S0r_M r 

nounactaonpattfA} , 

anoop|£,8) , 

(«46qt ta^rtionarg, J , » , A, SO , S) 

}. • - 

% actionarg 4: the object ol? tbe nomirtAl action is *P actiijn^rg 
% Blocking of lL-2 Qfine tranacription by activat«<i rapi . 
actionargtA* —> flnocp<so,60J , is tha input string 

{ \+ ch«cfc*t <actionargr4 }, 
act ion ( S«w, [n,vini5 J .Target, Fieataree> , 

actionarg(Al.) r 
optby*0«nt<A2} , 

{ (member Idaf , Pe^turas} , 
modliatM^J..A2l ,H&as) ; 
mtfcftber (rev, Featut^*) , 
ModllBt ( [AS^AL] , Moda> ;> , 
f raTna {H, action « Targe Mch5s ^ ^ 
ad<3st (*<!t ionarg, 4 , fi , A, 50 , S J 

)■ 



\ no wore actionar^ - gave failure • 

actionar9t_) aiidat (accionarg, 0, f (I, tail}- 

% nc>unACtionpatt ia a nomtnal action pattern "which all ova for IftCt and ri-g^c 
t raodifiera 

t 11-2 gens trans<;ript ion madiataa l>y tcr and cd2« waa inhabited by rapi . 
^ Activated i^Api fimctlons as m negative rBgul.ator of t« and cd- 2 e- mediated 
il 3 tmnscription . 

* jwunnotlGiipatt xa aav«d U a aymbol t*iblis (at} r ch^cte for awcceea/failxire let 

V case where noimactlonpatt is in Bt; »Ad has been ftgcceaaful 
aounactionpatttAJ cheekBtinounactionpact^^^s, AJ - 

V Caae wber« nounacclon patb ia ia at as a failgre. 
nounactionpatt L> — > check»t{Ae>uAactionp»tt,_^ f r {i, fail} - 

nounactionpatt (P) aiiooqp(SDrSO) , t so ia tba input atritig 
( \+ checks t{nDunacti*npatt /I r_^„r.SD,^) J , 
actlonlmod tL^SynlJ , 
nounact i onuni b ( ^) . 
actioaznodd^, 9yn2) , 
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( tsyni = ved, appei^a in, Ih] , ra) , 
append (l, RA, ; 
6yni ■ vin^, append(R, [Aj , RA> , 
L, = (ftction. Verb, Object ] , 

frarft6(P, action. Verb, MiWisn , 
addet <rto\mactlonpatt . I ^ b , F , dO^ SI } . 
* no mora no^inactionpafct - ^&ve faiiure 

nounactionpatt addet inounaccionpatCr 0, f ,_J , {i, feii^}- 

k the central unit of no^actlonpAt^ la a nounaccp^tt or a proceea 
nounact ionuni t (A > - - > ndunac tpat t (A ) . 
nounact iot^uni t {A ) » - > proces b (A ) <. 

% left iDodifiei^a of nounactpatt 
% £vad- inhibited cleavage pf P^l 
«0tiOflliw>d{la,ved) »^st**ieeB (s) , 

a ct ion ( Sflcn , ( , Target , Po^i tu a ) r 
{ frame(L, action. Target, f5D ) ). 

% apoptosia induced c;le!W«^ge of pa 2 
actionlmodCLrVed} procesaC9t, 
Dptdaahr 

action [Seni. [ved] rT<i3f^«t, Features > i 
( traifta{L^ action, Target, [SI V }- 



% apoptoBia cauaing c:l««vag« o£ P8l by Gvad. 
ft heed to invert Che order of noun^ctpatt and ftctlonlTncd 
act ionlcvod (Living) — > proc»&94^ject (A) , ? pr&ceaa or doxmacpatt, 

ftction {Sam, (vlngj , Target , Faa&urea) . 
( franteCli, action. Targe t« a) }. 

actionLffOdC [] {] . 

actlonrmodtRi vedl action [sem, fvedl , Target, Pentpi^eff) • 

byagent (A) , \ may have to add ving to action rwod 
{ framw (Reaction, seen. A) ). 
dcti&nrmod i tJ * .J "> I], 



% «ct.^<iet paraee a aimplv «cti^ laecvreen aiAbatancea expf^aaed by ^ active verb 

« actpact LB aav^d In a aymhol tabl» iat) ; clieck f dr aucceoB /failure % * lat 

tr Caae wherB ACtpatt la In st zind h^s been auoc^««sful 

actpatt (P) checkat (actpatfc,__, SpF) , 

t Cad« where actpatt is in st as a failura. 

actpattij --> chackat(«ctp^tt,_,f , J , {j, fail}- 

% actpatt 1: substatxre acts on a\ibatan<5« 
> PDKl phoephorylAtes p7Ca5k at thT229 
actpatt I 

snoop (BO r SD> . ^ 50 is th» it)3?^t etring 
{ \+ chockettactpattjl ..r_.-50,J ) . 
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sew^-jtfhlchreL, % opt 'that' 

prcpopt,. * Added pr«j>opt to ail ow action 'to' and 'with* substance 

5iteinfo(Slte) , 
anooptS, £) . 

modlist ( lAL.A-S^site] .Made J ; 
member t re , Pea t u rea ) , 
raodllstC(A2,M,Sit6] ,MO^S)) . 
f ratne(Pr &Ctit>n^ Targe C^tufods) , 
^ddstUctpattrl fS,F,Sb,S) 

}- 

» S«J5st*n<?« was bound by Substanc* 

I Substance was «issoci«ced to Substance. 

% r can give «ith*r tir^t or a^coxid placv to the eecoxid «I^grumeRt^ 
% a bya^ent crets ft ret poaitioii; prep*gant gets eecorttil. 
^ Pho^phorylated Fyxi vas aa&oclAted vflth Cbl. 

actpatt(F) 

snoop (SO, &0) , % so iB the input iJtting 
{ \4 chfee>:Bt iactpatt^Sr_*_i€0,_) }^ 
aijb3tar>c&&{Al) r 
ae7n_b©t«rm|_> , 

i^cticA vSemclaflflj [ven] ^Tar^wt , Featuree) , 
optbyorprepagent (Pofliti.^,a2) t 
aiioop(S, 3} , 
{ < raeiwJj^r f def , Pe ature a \ , 

{ Poe 1 1 ionsae cond , modi ie b ( t r A£ i Si-t e3 ji Mods ) ; 
Poaiticn= first, nWdXl*C < Sits) , Woden ; 

cneitibBr I rev, F^etusccB] , 

(pcisition-aecond, modlieti tAS^ Air Sice] ,Hods) r 
Poaitlcna first r irxxilistUAl^ A2^Slte] ,Wod») ) } , 
frame tP^ action. Target , Hodaj , 
addst (act pat C , 2^ SD . S) 



% no nx>re actpatt - a«ve failure 

actpatt (J - - > add^t (actpatt , 0, f , _J , { ! , Call ) . 

% naunactpati: parses a tiitftpl^ action between subatancea exprefleed by a nominal 

% verb 

t 

t noun^ctpfttt is. aaved in a aymbol bftblfr {at) } cbao); tcx auccetf^/ failure 1st 
^ C»«e wh^ire nounactpatt ia in st «Ad have besn su^eeaaful 
rtcunactpatt (Fint) > chfic^cst fnO'Unactpatt: , s , Pmt ^ . 
V caae where nounactpatt ij in at aa a failure, 
nounactpatt ij - - > chcck*t (nounactpatt ^ _ , f , _1 , { ! , f a i 1 } , 

h nounactpatt i : 

% JciJt phoBphOWlaticai of Bad 

n<>unftotp*tt (F) 

«j:boc^(5Q,SQ> r % SO is tb« ioput ccrlng 
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{ amlDoacidteac ( Al } } . 

ac c i<»n {3wc lase , [i:) ] , ra r^eb , Feat;u re a ) , 

{{mBinberfdee, Pe^tttrea) , 

member (r«v, Featur-aei , 
inodIiBt( tA2,Al,Site) . Hods) 1 , 

ddtdst; fnounactpAtt, L, 9j F,eOr$) 

}■ 

% noyn^^ctpAtt 2 : the binding of ijxibetance and substance 
t aeaociatlon of Pyn an4 . 

V the reason for j^aving this as a separate pattern is t^^ 
prevent ' Pyn and cbl' from belTiig e>firaed together a a auba Canine a 

nourtActpatt rF> 

snoop (SD, SO) , * io the input BtriT\9f 
{ \+ c-heck&t ^Aounactpatt , 2 *__i_/90,_) J, 

action fattach^ [ving,n] ,r«g«t. Features) , 

ofobjeccKAl} , 

andobject (A2) , 
1r 9it:einf<k<sice) , 

( inodliat(CAl.A2r&ike} .Moda) , 
frame IF, action. Target ^Koda) , 
addat (nounactpattrij B.FrSOrS) 
}- 

% Tiounactpatt 3i 

V Th« cleavage of protein by sutoetance. 

k Aaaoclation of ph<>«pnorylatftd Fyn with CW 
I Tyrosine phoipltoacylatioa of Cbl by hinaae 

% opbb^rprepa^ent determinaa tha ori^er of argument a 7 byagent ia placed fifat; 
t erepagent ie .placed aecond 

nounactpatt tP> — > 

snotTpCSO, r V &0 ia the input string 
[ \+ chsG)cat(noiinactpatt,a r„,_,SD,_)J, 
actlonof (F) , " " 

i addsit {nowTJACtpatt , 3 ,a,?,SO,S) }. 

*cCioncf<F> --> 

fliteinfo{Sit©) , 

actLon (SeniclaBs J (ving, 3a3 , Target . Peat\ibre* ) , 
opto f otaj ( Al J , 
optbyorprepagaot (P»Bition.A2| , 

{ (tn«cnbef Mef . Features) « 

[Pofiitioziiaeecond, cnodLidtf (Al, A2,sit:e] ,Modi&) i 
Poaitions f irac , nwdlis t f lA2, Al , site) , Moda) ') ; 
meotiber (rav, Peabur**) , 
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(FosiCion-ffe^ond. mbdllat^ tA2riil , Site) ,V\o^b> ; ' 
Position- iir&tr modi iat { lAl, A2, 9iteJ rMQ<ia> ) i • 
frd^Ae {Fr action . T&v^m^ , Hoda } 

). 

\ Fyn asBocifttlon nith Cbl. 
uounactpatt (P) - - > 

3nc?oplS0,SDj, k $0 is Cha input 5trif^S 
{ checkst (nounactpattr^ ,_,__^SD,_) ), 

eubfitanca9 (Al} . 

action tSemcl A**, ^ving.n] , Targe t,l^*tuT««> . 

{ modi ia t ( lAJ , W , fitt ft3 , Hoda ) , 
framotF^^Ption, Target, Mods) , 
A64%t^ i nounactpatt , 4 , a , F, SO , e) 

}- 

anHinwicidtestUr X \= t«min04Ctd|_] , 

%r nounactpact 5t 

% lL-2 gone tr*Mt=ript.toii 

* CbL ohoisft^tiorylation [by substance or action] 
nounactpatc (F) 

anoop tac^^50^ ^ t SD is the input string 
( V-K (?h«cK»t (ivQunactpatt, 5 r_i_jSO,_> }, 

flubatazicea (A2) r 

optdash^ 

action ($«mclaa a , {n] , Target, FMtux^es) , 

I aitelnfo<9ite} , 

anoop iS^B) t 
{ IfflBcnber , Features) , 

modliflt^ CAl>A2,Sit©3 rModa) ; 

member (rev ^PflatuTM) r 

modliat [ Ai^fiiteJ ^Moda) } , 

f r*m« ( F , aet ion i Target , Mode t > 

addat {nounactpatt, 5 ^s,7^;50,S> 

}- 

n<ounact;p«tt 
% fyii-CJOi ae^oclatian, 
ftCrtmaetpat t ( P) - - > 

Bnoop<ad,50> , % SO 'i«..thje input atrif^ 
{ \+ cib«cte«t^flOV»actpatt,6 '_--„iSO,_) ], 
'gu)>fltanc9a (aj.) , 

subetancea (A:^) , 

action CSemclaBS^ [n^vin^J ^Target, features J , 
t Bi t einf o { Sita ) , 

{ nicdliataAl;A2,9it:«],>1CMis), 
f rain« (P, ftctl««^ Target , Koda> , 
9dd*t (nounactpatt , 5 , e « F, SO , S I 
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% C3c?l ^hosphorylat^a by £yn. 
nounactpatt IPl 

enoop<S0^5a} ^ % 50 la th« input string 

Rc(;iOii(Setnclas9, [venl ^Target . Features) , 

subetances ^ 

snoop (S^ • 

\ { (rwTnbartdef , Pe?^tures) , 

V member (rev, Peat ur«s> , 

V mi&dliBt UAl, A2,Sii:&] .Mods)) , 
fr anus (F^ act icuj Target, Mods) r 
addat: (nouM^tpatt, 7 . a^ Fr dO^ G > 

)■ 

^ no more nounactpatt - save failure 

nounactpatt(^J addat (nounactpAtt, a, f ,_) , {I, fail}. 



ODnnoccact<S«n>,5yn, Targe t,i!^atures() 

action (Sera, Syn, Targat r Feeeu3^ee) , 

(member (Seen, l^auiS^ , cauaei , activate, inactlvata . ffiqnal , eubatiCute , promote') ) \ . 

co7uv«ct««^t« i ^em , syn , Ta r^et , F«4i tor« e ) - - > 

CQime ctact (Sem ^ Syn , 7arge t ^ Feature s ) , 

ir amlnoacid llki^ tyroaine : qx. » tyrosine Cbl phO0p)ioryIatic«n 
% at pipsitloti 201 Tiir 
©iteiiifoCS) — > aTiiinoA<5ld<A) , 

{fi^*ittfi[S,aite, tA) . tJ)} - 

aiteinfoC^I — > 

Biteprapa^ * 'iis', 'at'- 
poaitiontS) . 
Biteinf oU)) U- 
Bit^prftp* --5 pr^ptemiUn,_l . 
sit^rep* --5 prEpterm{at,_'l . 
po«ltton(B) Cposition] , 

e«m_integertermll) , 
{ frame (s^ Bite, I, (] ) } . 



^ Th« dfifinitionfl of actions refer to tht lekicona lexaynact-pl and laxB^tnAC&.pl 
% Sem Ib th* B»m«xitic class r Syn Ib the syntactic cldea 
% F is the target 

% onaacticf^ added for uae with moreaction CO allaa>r par^ictg of conjoined 
% actiOAa 

0«e<lCtioia (activate J gyn, F,P*AtU3fee) •-> aet:ivatet.ermtSyn,F,Fc<itMrEs) ,{S}' 
cneaetdon(attachrSyn,F, PeetureaJ attac}itoTtift<Synj F,Featuree> , { I } , 

oneaction (br«9i)cbondr Syn, F . Features) - - » beeA)cboodtexin <5vn , Features) , { ! } • 



10 



BNSDOCID: <WQ__0(»3687A1TL> 



oneacti<^<iiiactl^atG,syn, P^P^AturaSl i u^irtl vat et* t^m < syn, Feature 5) , {' ] 

oneactiPn(r«act, Syn, ?,Fefttgree) ceacttermtSyn, F, FeatursSJ , ( ' } . 
DDGaction Irtleae^^ eyn^F, Features) releaaeternitSyn.F, pQatureaJ , ( J } . 

onBUctionCBi^aalrSyn,?, Features} signaLcemCsyn. r,Featutfte) . {! }- 
oneactiQn(aui»«tituC6,Syn. Fj Fe«tur«a> --> BubstitutetarniifSyn,?, Pe&tures) , ( I J 

pAfiactiQn(trdnecrlbe,5yn,F, F©atwt«s) tranacribeterrt^tsyn.F. Features) . { ! ) 

oneactioiitptCtttote.Syn.P, Pe^turee) promocetann (Syn>F, Features f , (0 

oneaction ( jene rate , Syn , F , Features ) gene ratct« n« ( fiyn . P r Fe atu res ? . { J | - 

action <aottvB.<;fe^ Syn J F, Features) actlvateterin(8yn/*.l,fflRturee) , 

moreactic^ <Conj , Args? , 

Conj\c [] , Tnct^gemodo ( [ [actioti, AID I ,Argi. Actlona} , 
frame (PI, relation, Conj^/icciona) , P - {Fl] } . 
action fattach, Syn, F, P»atur»i&] att«cthterm<Syii,Ai . Faatureel, 

mc>reacti<jn(Cotii^A)oga) ^ 
{conj = 0 .F -AL? 

Cc?njV»t] r fnergenxMSsi [ CaccioJirAin * Arga^ActiOrtHJ , 
ra'arfte <F1, relation. Con j rActionaJ ^ F= (Pi]). 
action.{hrealci3on6^Syifl^Frr4&tureB) loreakbondtBrmfSyn^F, Featutfea) , 

iroreact ion (Conj ,Arga> j 
(Con? - n f F =Ai; 

Conj merg^modB 1 1 lactioii, AD 3 /Axgs , Acftions) , 
frame tPl . relation , Conj , Atf tionej , P • [Pi 1} * 
action {croatcbodd, Syn, F, Featurea I - - > erea t8bondterm<Syn , F, Features) , 

(noresction { Conj , Arg« ) . 
{conj » CJ,F =iAl? 

Conj \=: 1 1 , rtiergemods i { (ft^^fcion* AL] ] , ftcga, Actions) , 
fr*7n« <Fi^ relation r 0©nj /AGtion*> , F = (Fil). 

ac tion {inactivate, Syn, PrPesit urea) in*ctivatetQrmlSyA,F,FaatviC«a) < 

moreaction fC&nj ^Arga} , 
{Conj a (] ,F -Alt- 
eon j \- I) , roBrgenjcn3« ( I laction , All ] , A.rga ,Acti&nal , 
frame (ri,x«l At ion, Con j, Act tone J , P= [FL] J . 

action (react, Syn, F, Featurea) reactteronCSyn^Ff Featured^) , 

more*ctii?ft(CQnj ,Axgs) , 
(Con5 - fl.F =A1; 

Con.j\3 [1 , mergemtxie ^ [ [actiortrJ^l-] ] . Argft , Actions) , 
frarna [Fl, relation, Cionj . AestionaJ , T - IFI]}, 
action (j*l*a*tr Syn, Features? releaaeterrftfsyn, P^F»»tureH} ^ 

TnDTt*Ptt«n icomj , Ars«> * 
{C*nj - [3,F =rAt; 

co7ij\= tl ; fwreenoiMSa ( I taction, Al] ] ,Ar9*r Actions > , 
frame (Fi, relation J Conj , Actions > , P •< CFLI ) . 
action isignAl, syn, F, Features} signalt^roo (Syn j F, Features) , 

TTu^tf e*ct 1 on ( COJ3 j r A tf a* > I 
{Coni - D.F =A1; 

Canj\=[J , ^noT^ethodH < [[acciOrtrMl 1 , AzgfS , Actiona) , 
frame<Pl, reidtion, Conj .Actional , P- IFI] J . 
actlDn(aubatibutej€yn, F^Featuraa) --2- aubaeitutebftrmfsyn, F^Feetureaj , 

tnoreactiortfConj^ Args) , 

{Coni m (],F.=Alf 

COftJ\-[], niergQinQd4([[actlon.Al]3,ArgB.A<?tlona) . 
tratwe (PI, relation^ Conj ,Actic>n9) . ? = tFll } . 
Action (t^canacribe , ftyn, F, P^eturea > --> ttanscribBtGnTi<Syn, F, f eAtviree) # 
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(conj - [1 , F =A1; 

CODj\-()i n^rgemodi (( {action, All ) .Arg5r Actions > , 
frame (fl-, reiat ion, Con j .Act Lens) . P - IFI] ) . 
action ^promote , 6yn, F«*twra<!) - - E>romoteterni tSyn , FaeitUJf^esJ , 

moreact ion icon j * Args) , 
{conj - f) , P =Alf 

Con j \- [ 1 . meirg«»mci4s ( C [act ion , Al ] ] . ArgS , A^t tons) r 
frame relation, Conj,Kcti0rt3] , F i- [Fi]}, 
action (gen&i^Ate/Syn^F. Feature generateteriolSyn, Fr?**tursBj ^ 

moreaccion (Conj , hz^s) , 
{Conj - U .P =A1; 

CQnj\=(D , m«rgei*ai3s([[accii3n,AilI . Arge^ActioiiB) . 
frama (PI , relatl<^, Conj .Actions) , F - (FLl). 
acciOnfe«u»«. SyiirF^ features) causecermlSyn, Features > . 

miareactioTi (Conj , ^ * 
{Conj t).P -Al: 

Conj \a 1 1 , raergemoda < ( [action* At] \ , Afiga ,Xction« > , 
fr^me (Fl, relation, Coni,Actionj| , F= [PIJ ) • 

% binds, phosphoryLateB and activa&efl 
moreaction (Cpnj r Arge > — > eem^cOA j reat {Conj i ) , 

oneaction [damrSyn, a. Peaturea) , 

m0reaction(Ccnd2, 3Lliat) , 

(ConjS - C] r AlXBt:s[] ,Ci9nj*Conjl. At^e = [ [actiCtn, Al ] ? 
CPAj2 \- I), Conj - conj 2, 
addfnod( [action. A) .Ali.at,Ar5a> }. 

morsactiontC) r (] ,S,S) . 



paa B 1 v>E cpiine ct , [ven ] . Target . Fea t } --> 
sem_beterml^> , 

conxiBctact (een, [ven] ^Targat^Peaturas) , 



proc«adpAtt M - - > disease [h) . 
proceeeeatt (A) - - > procass (A} . 



opttay9rf>tr«!?ageiit < f 1 rst ^ A ? - - > by ^g^n t (A) . 
Optlaycurprepageiit { Becond r A) - - > p r epa^ent [ A > , 
optbyorprepagentUirstrA) --p [] , {A = x) . 

byorpropRgwt (£iyet,A) byagcntiAl . 

byorpir«pag«n.t (second. A> - -> prtpeisent lA) . 

optbyagent (A) byag^nc (A) - 

optbysgant (Al --3 CI, {A - [x] } - 

byageftt < A } - ^ > [by] , 

BiibatancaaCA} . 
by«gent;<A} [by] , 

noujiactionp^ttlAj . ' 
prepagent(A) wibhobject (A) . 

prepagenc < A) - - > toobj « et f A K 
% prapag^nt (A) - - > audob ject (AJ . 
pr apagsnt < A] - - > of ob j e ct [ A K 
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\ '<5ptprep;^9ent [A} - 
Opfc prepagfcsit (A) - - > 

optprepagent (A J - - > 
optpropfr^fent t A I 



-> byagenfcfAl . 

wtthoijj^iCt (A) 
too»5j«C£ ( Al - 
andobject IA> . 
(I. {A= W]. 



ofobJect(AJ lof), 

lLOunacrtiQnpat&tA) . 

ofobleet<fl} [of J . 

£;\i.b^tniic«5 (A) . 
of object iA? loC] , 

actlonof (Ai . 

^tobp^cti (A) [of] ,eubat«nC!e(A) . k to paT«e Binding off Fya and Bad. 

opt of object (A) of Ob j*«t (A) ■ 

optof object ( (kI )• --> [1- . ' 

pxroce3*object{A) process (a) . % can be cxp*fld«d to noun*ccpatt, atCr 



%- optwithcb^cct M 
% c^^twithobjiect (A) 
withob j e ct ( A) - - > 
toob ject (A) - - > 
Rndob j act (A J - - > 
prepobjfict (A) --> 
prepob ject (A) - - > 



withobjBct I A) - 
[3. {A . W}. 

[with] , subfltancQB (A) 

[to], substances (A) . 

[and] « aubstftnofts (A) . 

[to] , substAncea (A) . 

i v/i thl , a ubstance a { A] 



(iptbyar^CAJ [bsrj , 

actionung (A) . 
optbyaxg {h> flubstaneea (A) . 

optby*r^<A) [3. (As ['aubat^nce unknown' l}. 

prepopt [to] . 

prepopt [wtebl - 

prepof-t [by] . 

prepQpt — > [of 1 . 

prepopt - - > ( 1 . 

* toopt 

toopt - [ta] ^ 
toopt --> [I . 

* withopt 

vichapt - -> [witH] . 
wtchopt tl - 



optdaah —> [ ' - ' 3 - 

optdash - - > [ ] . 

O^tof fof J . 

C5ptof [ ] . 

/• optactiDnarg(A> - -> *cti<3tt«r^CAJ . 

optactiona.rg( L] ] 0 - */ 

optaotionaiirg (A) - - > 
«etionarg(A) . 
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It tKfere is no tuTCheu Arguroent 
otf tact ionarg W - - ^ 
I). 

{A - (1 K 

fc 5uto«tanceB<P> substance < F) . 
% fiUbctancBslFJ substance < Pi 1 , 

more^Utaatancfls <Cpnj , PLiet) , 
» ( Conj , 1], Piisst = [D. ? = PI ; 

I:- CODj \=i []. 

\ insrgeaiocia (PI. PlidtrAx^SflK 

% f i:ait*e ( P , rel «t ion - con j , Arg b ) 

* J- 

% s ubB t Anc!e« ( F ) - - > fiubs tance bv± thmods IP). 

> BubetaAcea tA) - - > 

% pri^teiiifi (A) . 

t subs vi thmods . tx t 

^ eubetanceB la saved in a symbol table (st) ; 
I; ciieck for succa 9 s/f allure lat 

% Cass whBr« ^ubQtances is in at and H^^ been Buccessf'ul 
jubifcaoceafPnit) -~> checXst tBubst*»c«a, s, Fmt) . 
% Case where substancB is in St aa a failure. 
BubatancBst_> choc)<f t (aubatancesr^^ f r_) r fall). 

snoop (SO , SO] , 
{ \* checX<t (aiibBtancEs, 1. B,_, &0,__) } , 
IfTiijde (Lmjodal , * left modifi^ra 

(afiveralBubatancaa i (relation^ Con j , ?irst jKeet] ) , % conjoined subatancw 
r[rK>as {R[t»Odtf ^ ^ ( righc inodifi«r^ 

* <;re«^te iiftt of llsta cone airing distributed iroda, ot cubatances 

( diBtrlbutaauba (Diet, (Piir^t | Reat} j LTnods, RTno4») , 
% check Lmoda - "no" PI Ot F2 should be changed to no PI en^ AO P2 
f ixi?c<nj fLmc-da, [reL^Conj] , tTel.CS) I , 
\epl±ce{ [Conj.Dist] ,F) 
f rame (F, reiation, C2,Dist) ) ; 
% BUibatanoBS and m^i^i^dertf vithout conjunction 
substance {Di) , 
rntoda (Knhsda) , 

{Di = ^typel, SubEt*npei|ModsDll , 
deleta^ModsDtr [] , VtoABQ2) , 
Append { [Ln)od0r^Rv^e] .»odaD2^ Allrxioa«l) , 
d*a«i:e{Allmodeif [] , Allfnods2>. 
frame (F, Typel , Substance! , Al ImodaZ) ) ) , 
anoop(£«S) ^ 
{addatCaubat«no«t,l,«rP,5QrS) ) . 

/* 3ubiitftO«a<P) snoop^so.SD) , 

(\4 chackac (stibatanCiti?, 2 ,a,_jSO,_)}, 
coniplaxfp) , 

{addst (aubstancfrsc, P> SO, S> } . 

♦/ 

Ir no more aubabancvs- Sftve failure 

Bubatax3ces{_) add* t< substances, O, f«_^ , {!, fall]. 
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more substance B (conj ,Pli3t| , 
{ Cotij »^ ri . Piiat a [] , P ■ PL ; 
C«>nJ \= I). 

frame < reiacion , Coji j , ArgsJ 

|. 

% ' X, V, and a' 
nv&r ©sub* tane« « ( O&ft j , Air g« ) - - > 5 ^nt^con j r est ( Coa i 1 ) , 

sur>5Canc€ (PI) , 

{ conja = CI, Pliac = [), Cianj = CortjL, Argc w [Pi] 
C01X?2 \m n ,Conj2\=i /, Conj = Conj3, 
addtftod ( PI , Plist r Argfi) 

)■ 

4f to «jL1ow for BubBtancea wich fftciififira 
njDr©BiLb»tAACee{CDnjl. ArgB> eem^cQnjre&t liConji) , 



\ dlatributesuba 

% distributeji le«t roods and right modB over list of findinga creating 
\ li«t o£ llBCa of finding* with modB 
distr ibutesuba U) j t3 , J : - I 
distxibutesnto* fpiat , l5l ^Tail} , Lmg>i$^ , HmodB) : - 

difttributeBubs<I>iBt2,TAll,unDderRn>c>da} , *diattfibuted for remainder 

Dl s CTypel, Svibfttancel |MadaDtl , 

append ( [LnA0d9^ JUnoda) , ^sods2^1 , MlnnodBl) , 

delttft (Allmadal, [J rAll«ioda2> . 

append(tl>l .Di4(t2,Diet) . % Corcibi*!* findings to get li$e of findisige 

Irrods^A} Btaceterm (?) , 

{frame lA, state, F, " [3 ) ) . 
Ift^xia ( [] ) - - > aem^maasure - 
ImodalH) C] ."^ 

rcnDda([]) t] . 

BCatatarenCV) acclexCstAta, 7> . 

% ^Oif paBt participle of crrwtabond and braakbond actiona, th* target 
\ is she v)ord. b:l. : pho«phorylated, dflpbiiftphorylatBd, machylRted 
fl tateterw < P) • - > 

SiiOOj^ J «r gat tbe initial string 

Ciraat ehondt erin t Cv*ft ) , _ . _ K 

{SO = t?!.!}. *9«t the'firsc woM of the cferlng 
statet^nafF) - - •> " 

anooptSO.flO) , * get the initial atring 
breakbondtertti < [ven ) , , _ } , 

(SO « lPf.3}. kget the fifSC word of tfte string 
\ nay h»ve t<» add attachtextn t^t 'bound' 
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* Taken from MedLBE gramoi^r to handle »3 cm' 

e«ii(i_niM su r et « arm i Oft i c I , 
{ frairtefN,it*>iurfi, (WrWnitJ , [1) ). 
t complex pr«4icacea added November 6, 1999 
» CjrkL-C3G complex 

^ ras! raf-i coroiplexes * 
t she -grb2-Bo& 

% Jv/C?\F-f^/CIP-CBP/p300-SRC-a cc^ftplex 

cornfplex(C) proceins(PJ, 

{P = [A.BIJ.A \- i], B \- {]), 
opfccotnpl fixword r 
( freiiNft<C, compLsx, [P}, [] ) }. 

t a coinpL^x of NFAT4.witli cAicjtneLarin 

comiplexargift) , 

{frame <Cr coniplex, lAl , { ) ) J . 

complexargiAj [^f ] , proteins (A] . 

coiftplexargiAJ --^ [between]^ prottina lAl . 

^ €k connple;^ between KyD6Qj IRAK -2 ^ and thv lI^-lRa 

i50«iK>lexarg fAj action (contain) , prOMina (A) . 

tr ConiEiLBxea coneAinlmg BOB.X/OBt.l and Oci: proteins 

pToteiAfltP) --> protein <A> , 

{(A\-tlr appand(fAl ePl.PM }, 

nooreproteina (Af ' prot^inconnector^ 
pifOtalna^A) . 

nioraprot«ina ( [J ) - - > I] * 
protelnconnector ('-']- 
proteinconnector L'/'J- 
proteinoocui^ctor - - > L ' ; M - 

ft connector ('^ ^3- taken out not to conflict with reiatlori in 

^ connector --> [andl . tnor^eBUbatancea 

protelnconnectoriC^ [with] . 

opt connector protelncoruiector . 

optconnector -->[!. 

ooinpl6j<word — > ?cDinpl<j<] , 
oc>ma^!lexword [coroplexe^J . 

wmplex-nord [•signaling compleac^s ' 1 - 

optcDmpl Bjcword - - > complexwor-dt . 

qptcomplBJCword --> [] „ 

suK>stmce(Al protein {a) . 
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subBtance^A.! 
B>ubBeance<^l 

BubacanceiA) 
8ubBtance{A| 
substance (ft t 



ceiHA) . 

--> «truGt:u^e(A) - 
domain < A) . 
(A) . 

--5 geneore>FQteln<A) 
--5 arniaoaciaCAl . 

amaIIiiiolacula{A) 

mattar [A} . 
- • > proteins its tA) . 

comp-lexfA) . 



% tftia wiLL be modified latar 



proteintercn^P) , 

{fraine [A, protein, [))J. 

coraplex(k) 

GGmplextann<Fl ^ 
{framBtA.C?omplex, P, I3i J . 

cellfal 

specleatA) 

speciBstcxmitP} , 
{frainfr<^. speciea^Pr CI } } • 

acructure iA) - - > 

{ franta < A, structure ^P, U) ) , 



domain (AJ 

(fracne {a: domain^ (]1 }r 

gene (A) - - > 

git!fl«teJf«i(P) , 

(frame (A ^ gene r J*:. [] > } . 



genflorp rot« i rt f A ) - - > 
^temfP) , 

{{X - gene, frainw<Ar g*n«r P. H); 
X = protelA, frameUr protein, P, tl); 

X\= X \- protein, frame iA, ses^eorproteinr Pr CI J ) J . 



afiklma eld (A) - - > 

ainlnaacldtarni<I^) , 
{frainB(A,amlnOACld,F, D)} • 

smallcnole cule (A) - - > 

smallcAOleeuletemCQ) , 

{ frtime (i^, ' email molecule^ , P , I ) ) ) . 



matter (A) 
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{ f r^itie (A, tfUlSfttance . P. t) ) } • 

protQ i US t tfi teafuft ( p j , 

{fraM (A. 'protein Bl.t&'.P. [1)}. 

process (A) 

.{£rarne(A^ process, 
prMfiSB (A) - - > 

proc«s»t^m(P) t 

(frame (A^ process r P, CIJ-S}- 



% terminals 

<icmp Ifijcte rin (F) 
cftl 1 terra (F) 
spec i »st« cm ( Pi 
«fcr«<?tw*«temi (F) 
doTnaintermir] 
gene term (P) 

amijioac i{it.e rm { P) 
Bina L i mol ecui et 6 < F) 

pr ot einaitre t er id ( P) 
di BeaaBterm ( P) 
pr oces s t« rm ( P) 



wCcle^xiprotein.F) . 
acclexioomplex- F) . 

accleJi [species, P| . 
/flcelfex (structure rF) . 
acclexC^ofn^in, F) . 
accicxtgene.F) - 
aeclexfgp, . 
acclex(aminD«cid« F> . 
accl ex ( dtAAllmol ecul a , P } 
ac cf;i «x ( siibst«aice , P } ■ 
acclex(prot«£Aelte, Fl . 
acDlex(dii;«aaejF) . 
aocl«J(<proGeB3, * 



% act ion ^act ivat^a , &^ , F , Pea tures ) - - > . act ivateterm ( Syn , P , P^turas ) . 

a ctivatet erm ( Syn , p , Peaturaa h - - > accrl exes < « ct ivate , syn , P r Fe ature* i - 

j*Ctaclitexcii(svii,rrPeat«rt«> acol*»**< attach, Syn, P, Feacur«eJ . 

br eakboiudterm ( Syn , P , Pe* tvr e a ) - - > accl fl a (br eahbona , $yn , F , Feature a > . 

cr ea t ebo]idt<s rm i Qytk , F , Featuxea ) - - > « cfcIgxbs ( or ««&etond ^ Syn ^ P , Fa a t u r &9 ) . 

inaQ t i v« t e t e rm ( Syn ^ F , Faal;ur«« 1 - - > a cclexss \ioa ctivat e r Syv^ « F , F« Atvir ea ) . 

x^»cttem(Syn«Fr Featuraa} >-> acclexSiS (reacts. Synr Peatur^e) . 

release t ariD [Byn, P^Pftaturea} accI^Tcea (release, eyn, F,Fe«ti^reB) . 

BignaltBrmlflyn-^PrPe^tturaa) acclexBH (signal ^ Syn , F, Feature a) . 

BubBtitut^t^ml^ynrF^FeatureB} -^-> acclexaa (substitute^ Syji, F, Feacurea) . 
trAns«ifiheterm{synrF,F©atur«s> acclexaa (tx^anacribe, £yn,F. fa«tiAr«B) . 

prowotetexTTi ( Syn , F , Pea tur« fl > - - > accl e» e » fpromote , 3yti , F^Ba tvrea ) . 

proceflBtejTn(Syii,P, P*ntureB) --> accl«iCBfl<proGeBs, Syn. Features I . 

generatet6nii<5yn, Pfpeaturea) accle^^BB {generate, Syn, F, Features) . 

cauBQtermtSyn, P,F«aturea) accrIexBd{cauB«,5yni F^FsatureB) , 

^ &«m^i3t cantaina a phrase nhich ie an act i cm 
*<f ticotichk ( Semi is t } : - 

inter a act (fteialisbr C«-ttach, causa , csT^atebond, br^^-kbond , Activate , 

inactivate^ aubatitute-, transcribe « axpv^aa r promote^ ai^nel ] ) - 

% SenliBt <SontaLna a phrase wbicti ia a conna&ter aecieui 
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intersect O^mlist, Icavae. activate, inactivate, HuJbetituta, 



t Genome isectionct ends ber« ^ 

^ relations are connected by ccnj'u.r^^tdibt)^, or 
It certain 'cojin' prcifOsStlons. 

* TaKen frexn MadlifiE graemnar to hsndle connecciveB that are conjunction* 
k Bx: "70vere m^riclnga, poasibly £rom tuberculosis'^ 

Beni_relationtF, ID) t relation and «odifiers 

*«m_cQ[ncnapunc . 

eecn^cextainty I [J , C,rel) % 

pxeptsrm <F, conji> , 

>y>lic€ ( [ [xel, PI ,C] . 



* Bk: "marking 9, aweiiing", "markings anc} cvelling** 

sem^relatiop fHr CI ) - Beni_^conj rci (R) , 

% ''dBCksity niay r^^r^eent known tunur" 

^ ■tp^rteingfl, aniS awelling" .1 



eam_jcotiJ rel (PJ - -> 

B«m_ooniiaflipgnc , 

{ frame <F, rel , Con j 

aem^conjrastfConj) % reatrictedi conj, has not sefn_ralaticin_Bbowopt 

setn_coRfnapunc , 
eem^oQnjtami^Cotlj I . 
fc "markingrsT sweJtXiog* 
H©in_conjr«flt<' , ' ) --s* 
9noop(sa,so) ^ 

se m^commapunc r 
Bnoop<flr6) , 
{SO \. S). 

% TaJi^Atment of verbs frcxn M«dLBB's Granunar 
% fonn of "b»" 

eem_aiixverblB> flftm_b*t«.rni(B) . 

* fc^rm of "do" 

3ani_auxyerb (B > — > e em^dotar m ( B ) . 
% Corm of *liavB» 

s«m_auxverb(fi) a*ro_tt*v«temi(BK 

aem^recrel - - > pre|>t«Jrtft ( in ^ _) . 
aeni_r*crel - - > prepterm<to, _) . 
% "ift not" 

tf*rtS_auxrfil (V) Bem^aLjxvarb(_l , 

SQm_^negfcerffl<V> . 
% ftiBi_auxr e 1 (V) - - > sanTaiixvfl rb < V > . 

% left modifiers of titidin^9 include na^Ation^ guantiby, certainty, degraa, and 
It oAange type cnodifiera 
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tager f wl - - ^ tw] , { integer (w ) > . 
s«in^ integer Itff Lnfce9«^ter^IIiV^ . 

B€Tii^ tini«iini t ( T > - - > b imeun i t te rm tr > . 

* Pi-om MedLBB gramniflr • "laating 2 day*", "for 2 days", 'times 2 days" 
e!enii_i3uratLon(F) 

5cin _j>reiT>Basurej vabout 
flem__tiii\enie«€ure (T) , 

sem^durctiiMnmod , t o^t . - "in dui?attoii" 
(f rami; iP, duration, [Tl , [] ) > , 

$ein_durprBps — >itiTne*] . 
eem_durpxepd - - > 

prept*rift(f or . 
teni_durptepa - - > [l^fiting , f Of ] . 
3ei!ift_durpr ep b - ' > [ 1 as 1 4 ftg] . 
secn^dvirpreps - - > [laated , f or] . 
9Gni_durp7«pe -->[laflted]. 
^«TA_durAtl.onTnod 

aetn_«pQ9 13 , *opt . - «• • a " 

4!«irt_durattoiiiiiQd --> tin], (Natation] . 
B am^dorat iohmod [ ] . 

Bem^apoatB --> [a] - 

sa!m_apo^t --> [3 - 

% a«m_frsqumcpy taken FrGcn KedL&S'* srraminar 

% "two tifflfca", "timea two", '^t'»^ timea a/p*r vfeek'S "two times daily" 
0 enrt_f ijequency ( F ) - - > 

B em_f reqt erm ( Fi ) , * « once " 

aem._£z^eqbarin(FZ) , % "H day" 

(frame fK.uniCvalr (F1,F2D , [] ) , 
frame (P, f regency , [M] , [J ) } . 

sen!_f rtqu^ncy f F) - - > 

ftem_freqterni(M3 , * 'ql-d", "daily" 
{frame [B, Cr^qvieneyrM, [] ) 1 ■ 

% "2 tini»««, 
sem_faf«^eney [F^ — > 

sem _^reineaBur«r 

aein^qiuintityt«rm(M> , 

{f ratiw [Pr f Jrequencyr [M] * tl J } . 

* 'times 2" 
aem_f requency ( Q ) - - » 
aeni_kint«s, 

a*ifl_^antitvterm (oi> , 
{erame <Qr frequency ^Ol r t IJ } , 
sQtn_f r«quency (f) - - j. 

[q] / aem_quaiiti&yt»nn(Q) r 

seni_tinie«nit [T> , 
{ frame (P, f reflviwiqv, [unitval ^ IQ, T] J , [] ) } . 
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ftM_f requartcy (F) --^ Bom^e ache very, 

sem_t iiwftuni t ^ T > * . 

(frame IP, frequency, [imitv^l, [0,T, every]] , 111 }- 
(^«*^_trc<^^uency <QJ «r "Becoil^' 

sein^ttnieopt . 

(fraiiHe(0,frfi<3ueney,O, [J) } . 
aein_£r«gvi«ttcy ( t3 ^ 5 , 9 > . 
sem^tlmeopt I time! . 

s&njCiiaBopt I] . 

s«m_^*c h« vary - - > [each]. 
sem_eachevery [a vary] . 

Bem times- -> [x) . 



% Taken from MedLEE^a gxamm*! 

negation modifier - "nc>" a a Izi "no c«rdio»agaly" 
aam^negat ion - - > 

% i:begatiO(ii not pr^ssnt; 

% T>ak«n trom liedLSB's gr^btum^tf 

k g>ianclty modificur - "two" as in "twtfi rttassee" 
aeni.gueinbityfP) — > 
snoop (SD r 90) , 

{ \4- checKst<*«iii_4ataa.l,ar_.aO,_) ^ % noc a leglfcircate date 
ssm^-naafltityternifQJ , 

«e«i_<3tuantltyrn>od(^) , * "2 or i", "3 to 3" 

(\+ nex-C^wordunitTsoL % rule wt '2 mm* 
franw IpTqoantity, Q, [] ) 
}. 

s«n_quaAttty{ n ,SD,So) . 



aBni_cQiij teriii(C> --> acclex (conj , CS - 

B©ro_dot»tfm(Pl accloxt-vdo^Dt . 

s©m_wi^ittarlt ( r . |s] .s) . 
sem_endiBark ( L r | S] ^ 5) , 

efim_f reqterm<F) acci«x(f raq^?) . 

eem^liaviBta rm (H) --> acc i ex ( vha ve , K > - 

xntegertBT^tX) acclex<iiit^«r, r| . 

seni_^nieaswatitrtn(K) acclexCimib^r^l . 

3Gtn^rtedterm(K) aoclax{tnftd, M) . 

9am]^iie9&enn(N) acclsx {nes r ^) - 

prepeewifP.c) accl4X<p, [P4C3) . 

»«m tlineunitt e rm IT) • - ^ acclex ( t-imeuni c / T) . 
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V ItMg - 5idapti5d ?roni w^dLEE lexicon 
%%^^»>%«r%t%?^l%^l%% CLOSED KORD CATEGORY LBXtCOM 

i -un)cnovm f aiL> . 
•-multif Lle<wdef/J) . 
wdef [cannot . neg , c^ol . 

iMde f (no . neg , no) - 

v^e f [ncine f a«g i no) . 
w<3ef (ricit,neg;iio> . 
wdef (nothing^ nag, no) . 

( ' & ^ ,cQttd , andj . 

viSaf ( ' - * , grarrtnar , ' - ' f . 
vd«f cong r^Jrvi) . 

wde t ( A L though r con j , and ) . 
wdef (and^ con j , and) . 
vdaf <£B , conj , and) . 
vfdef <b«c^xiso , conj , and) . 
wdef (but, cosij , and) * 
wdef < • , • , conj . • r ' ) ^ 
wdef (except , con^ , jn^J . 
^w^ftC ( 4 Jf, grammar, if) . 
vpdfef {rnlnua , con j ^ noj . 
wdef <nor , conj , n«) . 
wdef lor J coo j r or ^ . 
vfdef rchat: , grancnar , that.} . 
wdaf (.though, conj ,atsd) . 
wtSef Ithxu, coo j , and) . 

(ver aes ^ conj , or) - 
wdef (vexBua , conj rOT) , 
wdef (vs, conj ^ or) , 
wde £ C vhe A « gramma r , when ) 
w0e£ (wh«re . grammar j where \ . 
wdef (whereaa, conj^jind) , 
wde f ( If hi ch , gramniar , which ^ . 
wd«f ( whil« r conj , and) . 
wdef (vrho, grairanax, who) . 
wdef (yet, conj , and) - 

wdef (abov«,plOCrateve) . 

MflAe* («bout,B^, CappatoxiEnataly.noonoJ \ . 

wd&f (about ^ pi oc, about] . 

wdef {acroBSrplocjaorOfls) , 

wdef {abut ting, ploCrO^Ar) . 

wdef laccciinpMlev.Pr Cwlth, conn]-) . 

wdftf fACOOtftpanying^p^ [with, conn) ) . 

M&^t (adjacent , ploc, adjacent) . 

wcfeeC [ftdjacent, regionradjaCMt^ . 

wdef (after. p, [afber.coon] > - 

wdef [after . tprep, after) , 

wdef (alonsip, [otl.-RCOAi^j ) . 

wdef (appr03<in\fttely,p, [approxiTnately, noonn] ) . 
wde C ( Around r P r I approximate ly , nconn ] ) . 
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wdaf (AbOP.Pr [on.nconn] ) . 
vd«< ( l^f or« , ploc , be fore ) - 
wdef C before *t prep, be for ©I . 
wdef < behind, pi oc, behind) . 
wd<3f (bolow^ploe, below) . 
wiiie C < be twe en . pi oe , be twe-en > . 
wde f { beyond , ^1 Qc r b^yondl . 
wdef tby^plDC, near) . 
wdef (despite, p, [wich.conn]) . 
v«^«.£fauJ'ing,p, tduring, conn] f . 
ufdef [during, tprep, during J . 
wde.f t-enoaBing.^loe, ftnc<^a£i.rtg^ - 
wdef Uxdendit^e.p, [in.nconn] J . 
wdef I foil owing, p^ (af ter^ coma] ) . 
Side^C (following, t£?rsp, «i£t«r^ . 
wdef (f or,p, [for, ftCdnn] J . 
wdee (froni,>, [ from, conn) ) - 
wd*C(inrP, Ufi.nGonn]) . 
wdef (Including^ p, [v^ith, dOo^^l ) - 
wdaf (into^p, {in.ncfljxn) ) . 
*<^s©f (involving, p. [of.nconn]). 
wiJef (n«xt: , cprep , next | . 
vKSef ( occupying, pr Cirt,flCOnn]) . 
vdaf (on,p, [onrrtconnl i . 

wde £ < over , ploc , ov#t I . 

wdaf (avarlio , ^'L<>^, dver) . 

wd«f ?ov«riied, ploc, over ^ . 

wdef <ovexlleB,ploCrOV«r> . 

wde f { over I y ino[ . ^loe , over ) . 

wd^f (prior, tprep, before) . 

4i>ddf <near,pL0Ginear^ . 

wde f ; radi at ing , ploc , i« t i og) 

wdef (regardiTjg, p, [about. nconn] ) . 

wd«f (ro4)ghly,grairniar , roughly) . k 'Youghly $ mtui' 

wdft^ {gince.p^ [Bincej.conn.] > . 

wdef I Binca, status rsube^e<^ant1 , 

wd©f lthi-ough,pr Citi.nconn] > . 

wdftt (cnrowgi^outjp, Cin.nconnI > . 

wdef itQ^p^ [tOrncannlJ . 

wdef (toward, p, tto,ncortn] \ . 

wdef (towarde.p, [during , connl > . 

wdef (una^rrpic-c, below) . 

wd^f ^uftdemeatii, plocbslDv) * 

wdef t until, t prep, until ) , 

v?de£(up,graimiai-rUp) , 

vdef (upoArp^ [onrnfl<2iin.] ) . 

wd«f ^viA,p, Iwith^coTin] ) . 

wd«f fvitb^p, [with. conn] J . 

wdef (wlthiaipi [la. conn] ) . 

wdef f without , p r [no, connj ) , 

^wdeC (without, neg , no) , 

«r«;%%%%%t»%%»»tt%^l»fc%fr^%t UNITS OF WBft&URB «r%%*f*ft%%***%t*%%»t%*»^*4%%%*t%t% 
wdef r * * . Urtl t , pereenti . 
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wdef (CC . UAit e GC) , 

wdef tcgntlmateif , xiA\t , oro) . 
wdef (c<*nticR<iCerja , unit , cm) , 
wd«f ^c<tt, iinit,cni) , 

wdef {gin « i^iC , gram) ^ 
wdef {gnis, unit , graml . 
wdef iqr^ns, uiiit , gram) . 
>kX^S {gr^me , un i C , S ram ) . 
wdef {)tg, unit , kilogram > . 
wdBf {kilo, unit . kilogratn) . 
Mxdlsf { kilogram, unit. «)cilogram} . 
Mtdef (kilograms , unit , kilosr^tkis) , 
Mdef (Iit«r,unit, Iit«r> . 

wjltf (micro^raiin^unitrinlcrDgrani) , 

wde f (mic rograms , un i(; r <ni orogram \ . 

wdef (iniLIlll&Br.ui!!iierml> . 

wdef (inLllilit;ei'it,unlt,inl) . 

wd<:f (rtiiHigifam. unit ringj . 

wdef (mliaigramarunit.eng) - 

wdef (milliaBconds , xinit , milllBecond^ r 

wd«f (niill Lvol t«r u'lit^^niilllvcit) r 

wde I ( ml , lini C , ml J . 

wdef (millimeter, unit, mm) . 

wdQf {mill irnvt^Y^a, unit ,inn> . 

i*nlc.f < ir*n ^ unl t , ITCT ) . 

wief {oz-e^unit ,ounc«) , 

wdef < par cent, iinitrp«i if c:ent) . 

wd£f fhalf ^Int-eger, 'c?n« hai* ' ) - 

wdef {B©mi, qu^»nti.ty^ »emi> . 

wi»f (ii, lntage.r^ 2> . 

1-4*^ liii , integer , 3 ) , 

wdef (vi, integer , 4 > . 

wdef (v, intBg^r, 51 . 

w^ef l^i , integer , 6 > . 

vdef (vil, Integer, 7) . 

wdef (viii^ integflr, 91- . 

wdef (ix, intcg^er, - 

wd«f (xS.lrifttftger,L2j . 

Wdef (iclii. Integer , 13) - 

wdef (one, integer « I I . 

wdef (two, integer, 2 1 . 

wde f ( doable , qviant i ty , double ^ . 

wde t i three , integer , i ) . 

wdef < four , integer r 4) . 

wde £ ( quadruple r guAOt i ty ^ ovadruple ) . 

wdef (five , intogttrr « 

wdef (Bin, integer, fi) . 

wda£(«idetyri-nteger/60) . 

wd&£<««v6».rinteger J 7} . 

wdef f « ight , integer , B > . 

wdef (nine, integer, 91 . 

wdef {ten, integer, 10) . 

wdef {eleven^ integeJr,ll^ . 

wdef {twelve^ iCLteger,l2} . 
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wdaf CtHlrtc«ii, tnt^^j , 13 ) - 
wtSef ( fourteen, i.iitegfSr,14) - 
w<5e£ (f if bften. integer, 1^) , 
wi^e f ( s ixtft<5ii * integer , i ^ ) , 
wdef (aevesiceen, integer , 17 ^ . 
wdef (ai^btean, irtbcger, LBh . 
wdflf (ninetfi en, integer, 13} . 
wi^ef (twenty. i.nc:egftc , 2{» . 
wdef (thirty, integer-. 3D) . 
wdflf ( forty r integer , 40 J . 
wa«f ( Cif ty, intftSfrt, 50) . 
wdef (fllxty, inceger , SO) . 
wdfiftaevfliity, Integer, 70 J . 
wdef ^flighty. integer, 8D I . 
wijle* ^ninety, int«gcrr 90 J . 
wdef (hundradr integer . 100) . 
wdef (tbousain^l. Integer ^lODO} . 
wd*f ftniliiqn, integer i aOOOOOO) , 
videffbiLlion.inteesr.tiiLlion) . 
wdef {zero J integer, - 
wdef (first , oi«tegfiri 1 J . 
»Qi»f [6«c?Cirt*,Csintegerri) . 
wdef (third (^Dintager, 3> . 
wdef (fourth, oiiit«g*>rr ^) - 
w<3ef (f af thr ointeger , 5) . 
wdef {sixth, Q integer , 6 ? . 
wdef CaevenCh.rOint«g*^, 7] • 
wdaf tcighbl^jOij^teger, B) . 
vd^f (nintlir£>iiiteger,^) - 
vde f ( tenth , ointego r , X 0 ^ - 
wdgf (fla*v<ntnrC»int.eger,ll) . 
wd«f ( twelvt)!^ ointegar r IS) . 
wdef (thirteenth, oint^^tr J 13) - 
wdef ( f our taanth, olnt'^ger . 14 ) . 
wd«f iftc«Aelf),ointeger^ 15^ . 
wd« £ < B ixt eench . o integer > i 6 } . 
wd&f (seventeenth, oint*9«rr 17) - 
wdef (eighteenth, aiAteger, 18) . 
wdef {ninfceenth, ointegar , l$J > 
vfdftt {triple, quantity, trapl*) , 
wdef {cwentiath,oint«9<ir, so) . 
wdef (thirtieth, oirttegeafr 30) - 
vdef l&in^l«^^IXWltitv.i) . 
(ft^titary^^juantityrl? . 

vdef (fratiu«ncy,gr»/wnar, frequency) .*./ 
vdef P . • ,graimmjirr ' , ' > . 
. wdef ( • ; ' , ^ranmiar , ' ? ' ) - 
wdef ('/'.. grarnmax/ ' / ' ) - 
wd€f f ■ : • . granrmar, ' : • ) . 

wdftf < r carta intyr 'twdCJr-ate certainty M . 
wdef i ^ + ' r cextainty r ' high certainty ' ) . 
wdef • ' *,griwronAr, ' " •> - 

• wdef <OttC« , i s:eq, 1) . 
wdef (ciiAee« gramma r«x] . 
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* lexicon vfitzh lexOg containi-ng ccmmon English yoxds Adapted from l^xO 
lexlg from l^xl of HaiftLBB 

CAROL. FftlECmW . % 

OMSEXtS COLLBCS, COLUMBIA UNIVER&ITY ^ 

• Version ^0 4-01-00 * 
Vsrsiofi 2.0 1-31-96 * 
Version 1.0 1-5-92 * 

% SEWANTIC LEXICOM POiR CLlMtCAL T?BXT ^ 

% "fhe lexLCOft consistB of aeveral £ilea: % 

% laxOg^pl: eingle vford elc^eed classes 

t Ift^lg.pl: single word - general modifier cyp< viarda: 

% 

* wdef tcat«gc«? V r ta rget ) . 

% waxd - is trtie wtiii± of the wor€l being cate^&rized.- 

t category - the acTnantic category for thia word 

* Carget - la the canonical /standard f^rm far th% word 

% words wy)i-(=h are synonyiAfl should be aBBlgnsd the aame 

^ canoni^ral form. 

^ ?nulti-wQrd f>htra9ea are cat«gocisfi<5 a a toll^mmz 

k phrasro (word, category^ phrAJfe, target) - 

% ^ 

% Semantic CaCagori^fi ^ 

% certainty "poagiblv*' 

% canonical valuea limited to: (DodarAte - for possible 

t hi«rh - for bi^tk posalble 

% low - for ' lew possible 

* Gonj - ralationAl operators "•nd". "or" r «hich oonn«et one finding 
V tc- another- finding 

If n*9f - negation "no'S "flot" 

% quant - for ijuantiCAeive in forma bi on "many" 

: -un3mown f Aill . 

: -enBure_Io«ded ( [napbraaa « loxO^, lexlg, lexaeinACt, X^Ksyn , lex^ubl } . 
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^ -definitiDna K*pt fjroni HedLEB lexicon - lexl.pl 
wdef (ba , vbe , 'hi^H certjiinty ' } . 
wdef (b$€n, vbe, 'high certainty'] . 
wdef tb«ing,vbe, •higli cejrtainty • > . 

CviBB^vbCr 'high certainty') . 
wfSef (iB^vbc, 'high certainty')* 
wdef (were, vbe, 'hl?h ces^ealnty* } . 

wdef <bei5at»e,vc«rt*iTiey, 'high certainty' 1 . 
Wdef <bfeCQnie,v«c«ttainty, 'high certainty 1 . 
wdef (becomes, veer talnty, 'high c^irtalnty') . 
vd« f ^becomi lig , veer t ainty , ^ h igb car t a In ty ' ) . 

put in action lexicon 
wdef {changedr change^ change) . 
wdef I changes r change j change) . 
wij«ff changing, change, channel . 
wdef [neceeBarilyr certainty, ^high certainty' ) . 
wde f (neces sa ry , vr e commend , re commended \ . 
wdef [nQc©s9it*t« , vatatuB r need) . 
wi^ef (tJ«C"citated, VBtatus,need> . 
wdef (neceaaitating^ vct^tus, need) * 
wdef (neceaaitate^, vstatuBjneed> • 
wd*f (rt«ed.,vfltatiie,need) . 
w4e t ( ne eded , vb tatua , need J , 
wde f ( ne edins * ve tatus , ne ed) . 
£ ( nettds , ve tatus , need ) . 

*/ 
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wultLf ile{wdaf /3) . 
! -\m)ciioNn.(_, fail J . 

V Load in program cocnponent^ - Library compoft^jjts ar« part ot Parol Og 
:~ ©nsu^^e^loaotedt {iibrairyii^aaicsK (not) , library (lists) , 

lij3r*ryf reading , library {etriiiga) , lij?rary (ctypes) , library I radtdconflC> , 
libraryCdate) , library < lie tpartaJ , library I sets) , 
x&dv^&c , r adpa . use f ul , uc i 1 . 1 399 i^g , lexl con r 9«TV9]^aln] k . 

initiAll^atlon ruA, 
% run ! - QA^except ion < Br r or . pr ocea b r un , s t dp { E rr or ) 1 . 
ruDtlffw_entryl*t*Jf^c> :- procesBrun. 
mntiir«.%nCrylAboYC> halt. 

« pyocBsa report 
prcvcsaerun piroeesa, halt. 

teetop (Btror) ; - 
% told, 

% write (u^er^error, • firror i ' ) , write (ua«r_«iM?cir , Error > ^ halt . 

% gat user auppllad pAir^met ara and pTO«eaa report 
pjro'^eea ; - 

5et__*r$c (M^«le, rnf tle.Dutf iL*.PrJ5,lJnd€fs, Prot<ieol^ ^ i , 
(sxa?ntyp» ■ [1 r * oiuat hAv4 ft domain 
paroC*«c Unfile, out fil^^Frb.Lftdef sj ? . 

\ O^^A Infile (text input! «nd process 
procea a ( inf il« r 0\> t^i L« , Pjrb ^ Unde £ a ^ : - 

(Xnfllel , Been, s«6<lnfile} r 
on_except ion {Brri^r , 
tea b jgcnocne { Outf He, Pr b r Undef a 1 ^ 

*pp_jerro [__,aucf ila,BrrorJ ) , 
<loaeflleB<DutfilB, Prb.Undefal . 
pt:<^eBflf_,Out£ilBi,_,_) i- 

ipp_^«rr (_ , Out f I i e , ' Program £ail4^ ' ) - 

app_errO (_j Output^ Error] 

cell (Output) r 

writ* t ' r:»rrc>r>' ) , 

write ( 'Frol^ig Error oc^jurrwd; '), 

ftpP_^^^ output, ffrrorj . 
ai?p_ftrrl ( _ , output , error ) - 

tell (Output}, 

vrifc*{^«error> ' ^ , 

writ* {'Error in Input: 

a»p_ey r i _ , Output , Brror ) , 
app_e4rr f _ , output , firror ) ; - 

te in Output) , 

vrita (Error) , write t • </error> ' ) , nl . 

cloBa£il«&(C>utf&le, Err filer Tin file] ; • 
t^ll <Outeile) , told. 
iB«£lle - C); tfllHBrrSil«l , told), 
(imf lie s []; te&KV^eile) , told I . 
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% Argument optione - g^t user defined ATgumenta 

* -p ProbFil* (otherwisQ default is prgblem meaea^dc ar* not written to fil«l 
\ '± Infils (If inpvit la suj^lled by and not «taa4«rd input 

% -s sectloti <die£A\ilt Iq tinpresBiot)) 

% -m HDd« Mefault is relax^ the three choicer; are atrict, relax, skip} 
% -o Out file (if output ebould hm file and not standard output:) 

* -? Provide List daf ault arguments 

4r undefa (^thejrvise tfetAult la - undefined nessagea are not vrittan 
V to a £iie^ 

e«t_arB4! (Mode, inf ile,Outf iLB,Prbf ile,undefs, Protocol! : - 
Unix i ax9s (Ard s I K 
(Args = (] , 4, writeayntax? 
A.rgs • I'?']/!, vrriteayntaxj 
Jkxq^ . [XlKeet] , I , 

aet_arga( [X|lt«6t) , Mode, ln£ lie. Out £il«,Prb£ll€, undaf a. Protocol f I . 

Hrite»ynt« 2- 

write tufler_«rror, *9«neparBer I-m Mode] O, 
nl (u Berber Tor) , 

write (uftW^error^ ' 1-t Outtype] l-p Probfile] l-u undef bJ ' ) , 

nl (\xeer_error) r 

write (uBer^erxor, • t-i infileJ l-o Outf il«] ' > * 

nl (uaer^arrot) , 
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t n^phraBe.pl - contaiti* words/phras^B that are igiior«<i 
no»ein<botft, [both]) . 

Aosesn{s*lec.tiv«lyr laelaictiv«lyl ) - 
nosflmlspecificAlly. [apeeiflcalLy] ) • 
cio*«*i ft he, [th«J ) . 
rtosemla, [a] > . 
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* file raaj^rdb . 5>1 

^ .June 2*^ 199^ 

% fail an unJtnown pr^dicatfe 

:-unkAOwn(^, fail) 
z" op{90Dr ty, [not^ocice]) . % aame priority and &yp« ae \^ 
5- opC^oOr 3<ix, [\=,-=J). « ©me prioTity and type as - or == 

s- dynamic <5«nt no > . 
\sBinVradtpardb.pL 

^parse_B«.C)tence5 <*Be9r -Ptn&, - &ar£e£rror9, -Utidef ineds, -Undents, ^6ect:ian^ 



% 4U3e rMi>de , ^Exanityp« , S entno , Ou t^no , InC^no) 

% Beg i« list of aentences. Fuit is list of t.arget. forniar 

^ P«7seErrorB are a Hat p£ sentences wbich could not parse, 

^ VAdfef Ineda la a list oC imda fined viorda in sentence 

% Una ant a is a ot sentence containing un«le fined wordta 

% SeotioTt the a action of tha examinatiOA, T)ser>1od« la the 

% parsiitg mode spaclfied )>y uaer, 

% SxAimtype is t)i« dOotain (t.ypB of exam> 

% Sentno the number of the starting aentence 

% Outsno i9 the last aentance nuniber + i 

% IncSno is the amount tnat the santenca number $>iauld be increased 
^ ' (i..B. it i« 1 when called hy paTse^aa^t^fl and 0 when In 

^ r*«Ov«ry mode) 

% £ach sentence ia parsed ixidependant Ly . 



par9«_*«nt*Aeea ([].[],[]. O , H ^r_._._> : - *no tftora santeneea 

parse_eentencea (B«or, Pmt Li at. Out fail 7outundef a^ Ou^tmSents, 
Sect ion r Use rMode , Examt ype ^ _ ^ _ . 1 Sno) : - 
5tt_ J ent ance {Beg , s , ftast ) r ^ r 

( iaident.i£iar ^ J, « ignore identifier aorttencea ^ p^ir^a rentain^er 
parse^senteAcaa {Reet , Fmt L , Out tail , Qutiindaf s , QutvuiSent: f r 
Sect ion ^ Ua ar hfode , E^camtype r _ • _ ^ incSno ) t I , 
(outputformlhtext^ , & \" L'-']i !• IncSno \s 0, %D msana in recoves-y 

mode 

Appci%d ( [ [ [sentence r S) ] } , Fmtl , PmtListl ; 
pmcliat = Fmtl 

> 

Inc^no - 0/ J/ % OTJ came eentenc* in recovery mode 
% aentno (Sno) , tl«v&Q^ntno ia Sno IncSno^ 
It ret r act ( a antno f_ H , asae r t [ ssi) tno (NewSentnO I ^ 

»%) r 

% Tnceno - l, writa ( , wrlte^list 3,_} , nl, I, 
% Incano = Op 

pr«proee*« (S^Be.Undef pSenilist^atrict) , % bracket: and check fot undefined^ 

parse_iBodaB (5 , Ba , Somliat , Pint Errors , Undef ^ Unsant^ , Secticxn, write fail, 
Ex«wtyp«rUaerMada, IncSno) , k parse firat sentence 

p*r » e_6 en t fcacea ( Rea t , Fmt2 , M^r e rrors , Horaunde f a > Mor«UnSant a , 

SQctionrt3oerWi5de,Examtype,_,_, IncSno) , % para^ remaining 
append (srrora ^ Morearrors, Oxitfsil) f \ CoRibine teiluras 

tQiitputform(htaxt) , 

(Pmtl \= [], IncSno \= 0, 
I, ay^endf [Fmtl] .FTTiti, PmtiistK % add extra bracket for Utt 
Ft«t2 • [] , PiRtliat m F^fttl . ! 
) 
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append (Fmtl , Tn\t.2 . Fmtlist > 
) ^ \ Combine tar^^t^ 

append {Unaflnt^rMoreunSQnt a, Out > , 4r Cofftbine santencM 
sppfend(tJrtd*f,MOraTindefe,Outunaefs) « Cocnbine un<iefined worda 

) . 

%pai:3e_mDdeB(<-B,+Ba,+S&Tnliet, -FTOC, * Pa i Lures , 4Urtdfif, -imeenda, ^Secciofl^ 
\ 4Mri t fiMe* fiAge , +Examtype t +Ptode , i nc Stio ) 

% Si* original senccncer Be is eent^jTiee after l«3tical lw>kup 

% SetnliBt LB li«t <>t E&mantic categi^Jfies in iJetttence 

% Fmt is foziA^bted output , 

% Failure* i© list of sentence /fragment 9 which could not tee parsed. 

%' llhd«f words not in lexicon, Unaenta avs swteneea containin-g 

I und«fin«<& words 

section is Aame o£ sactiim )>4ing processed 
^ Writ«McsS&ge is meBsage returned from doresult lift case aoreault fails) 

^ BxAmcyp^ Is domain r ^tode is user *|?eeif led K)Ode 

% IncSiiQ Is 0 if ti^ift te a frsigrm^rtt of a aartt«nce th«it n^as «lr»ady 

% pureed - but unsuc-^^eafiilly; i« 1 If this 4a a new *en.tesice 

t Best possible - cry to get" the moat accurate pars© poeatble trying 
% all ftitet'ttative strategic in turn if KJee-eeaaary 
% All worda in eeiiit-«AC^e are defined 

parBe_fnDdeB(S.B*,5^wliat,PinC,SjfrOM, 13 . 11 rSection.no, fiSxaiPitype.'Pnicid^, 
" Inc) 

(Pmc-de = bpaog, Prttodemod = modea , li lip recovery mo^^ 
Pmoda = hpic^2, Pmodamod a mi5dfi2, \ 
P[rr:^d<5 - bpeeg3 , Pnuxiwood « mode^ , I i 

PiTPiide * bpekip, ?mo<Seiftod = modw4r ! r recovery nodB 

% in U3«r j&f>ftcifi«d parae ir>oi4e - don't p^rae in iDod« S or ?c*ywQS?d 
Pniod* \- keyword, Pmode \.» modes* 
l^itodeiTiQd B nodel 
), 

dosent C9,Bs.Seiftl±st,Fmtl,M«a9*g6,SectLCtti,_,E&arntyp»,PtftQdemo<i,_> . J* * 
strict fira^ 

rfecovfiry(_.a,Ba,S#<nXtet,Piiit2,rtB&*Age-ErroxB, 13 , 13 .Section. 

Pmode^Escaintyps,^) , * try alternative modes a* neccy 
(outputf oriwthtext) , inc \a"o, 3 / appana( [ [ [aentenc^rSl 1 ,tmt\, Ptntal ,fmtJ r 
4ippend ( Fmt i , FtntS r Fmb \ 

i . 

^ alternative &trat4ifi«« if have. undeCined words 

parse Tnodes {B,BSr eecttiiat , Fmt , Brror* , Uhdef , tJn««ttt* , Section, nO, Examtype ^ 
PTTiode,lnG) 
Uni*e£ \* E] . 

recavery<^r Sf^ Sa. fl«mii«t , Fmti , yes , Bvrora , Ond»f , UaaexitB , Section. 

Prtode , Ex.aitptype , ^) , * try altern^tivee if h»ve undafineda 
^outputf ortiiihteatt}^, lnc\a 4 , append I ( (Mntence, 33 1 , Fmtl , Protl ; 
t?mt i» Fmtl 
3 , 

% Hey word strategy is fact test but leatt reliabla-,- 

e*rae__modee <3 , Bs . Bcmt i*t , ftnt , Errors , Ufldfrf . Unssnts , Section , no , Exanicype , 
"" Pinod*, IttcJ : - 

(Pmods » Keyword,' Pfnoda s modes 
i piciode » itK^esI , 

recovery (5, semi ia Pmt i, ye a ^ Errors, TJnd.efjUnsanta* sect ion, Pmode, 
E^aratype^ _) , 

Coucputf orm (htext J , loe \« 0 , i , apipend ( L leentence , S] ] , Fmtl , suit) ; 
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Pmtl * Pint 
1 . 

\ PaTBing/Reocyvery modes 

t pa r SQ_<ttode b ( ♦Irt ve I , +s . 4 as , +sero , - Pmt . 4 Pa i 1 ed , 4Uftiae f , 4-Utt a ent^ , +5« c t Ion ^ 



4-5iDod« , +Bxamt ype , _ ) 
% liCvel iB the recovery level"of tha predicate 
t s la the original £aot;«nc6 llat 
k Bs is the 

& Seen iB the ^Ist of semaAtic <3«^egori€9 in t)i« eentenca 
% Froc is the fornuittcd output, for the sentence 

% Failed la <v«4' iC ehe paree waa unsuccesaful, an^ 'no* otberwiae 
& Clndef is a list of worda In sentence v/hich »ra undefined f not in lexicon) 
% UnsenCB the lists of sesitencas/BegDn^Ota which co^jild not b» ^raed. 
% Section ie the sec&ion of the repor& 

« ' Pnvode is the v«*t^ specified parse iood« 
^ Bxamtypa the domain 



% mode 1 la the a trie tea c pardittig mode - th* paraer aucc««ded for the compleCe 
^ original s«nte;)C^et tiding the grARcnarp all worde in origiMl Bentance 

% are ^eCineii in lexicon 

% cnodo I - alt«Amative not needed becauae parse aucceedad 

r^covfety (1, tl ,110, [] ,Undi»f,anBenta, I. 

% - no alternative eerategy allowed in rrode a 

4r in ^ajSe where there «re no undefined^, Moparse la 

recovery (L, 11 ,yea,S, O , H Pitode r s- 

Pfttode = strict; Pfliode « model, I , 
)t in C9k&« there are undef ^^r^edS/ unsenta is s 

recovexy (lrS,_,_, [] ^yes.Nopaar^e.Ucid^f ^Unsentar_* P««ode,^,_] :- 

(PTftode - atrictj Pmode - 'model'), 

undef \= 11, Vji««nta - >fop«r*« • 13, J. 
recovery (l,S,_,Seiftliat, [] ^ yea , S : - 

% s^rttert<5e contains no relev, Inf ormacionr don't try to recover 
1 \+ (sobtypsff indin^r fi«wiliBt) ; subtype (time <,Saoili<t> ) t I- 

\+ actlonchJs (Gerttliftt) - S april 2^, restored 

^ mode 4 - *iiip undefined words «nd try to parse according to mode 1 
tocov^iry (4 ,s,_,_, FmtryeB,Bxi'orfl,CJndef , CJ ,e«ct , Pmode^Bxamtype.^^ 1- 
UJidBf V= tl . 

(toode 9 l9£>r' Pvnode = inDde4r 

Ftnode - bpBeg; Pnrode « bpakip; ^ode - mode 4 
) . 

prepr ocesa (&,ha,_f Semi 1 a t ^ bpaki p) , 

doseat (S,Be,SeiiaiBtrPnit:l,Me9taage,SQct._rB3taintypB,cnod«4 4_) , 1 1 
rftC0v«ryf_,Ba.BarSanili9e, Pmt2,MeBsag*,Errorej [1 , [] ,5ectr 

bp«kip. Bxamtype, dent oof . t try altematlvafi If neccy 
apiiend I Pnt 1 , Fint2 , Pint ) . 

mode 3 - try longeat parsed segi^ent; partition rest of 
t aentence uaing- mode 5 for parse ntodQ bp 

r ecove ry J 3 , s , Bb ^ _ , Fmt , yea , Sr rare ^ Unde f , una« n t b , Se ct , ^od« , Examtype , _) ; - 
allowable nodes tor- chooalng longest aegineot 
{PiDD^e = bp; Pchode m bpakip; 
PoEKide - ftl^lpr Pnaode =. mode 3; Pmoda = inod«4; 
Prnode - (»peeg3; Pmode = bp^eg 
> . 

{^rnode » bpakip, ^mjOdOinOd - fnode4_3; 
. EntodecTkod = mode 3 
) r 

cliBc3cBt (aem _pat tern. _j a « Target, Be. Rea&^ j tcbecX ayfOAol table 
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^dooreaua^t (Target , PmtL , BKimtyp^ , Sflct , P«odenio6,_) i 
forma t r eauj t (Targ« t ^ Pmode im>d , Fmt i > , 
{Pmod* = n»d»3r Fwtlist «> (1. EYrora = Rest; 
re CO V4 ry { 5 K t ^ R« st r _ ^ Fn^tl i b t , ye s ^ Error* , Ursde f , Wb ant s , Sec t , 
Pmoiie , ExamtypB ^ _ j 

append (Ftttta, Ptrtlist, pvnt) . 

* n>ode 2 $.egraentfl senfctrto^ OElng word barrier nwchCtAS- This mode ia tried if 
^ parse faiL-ed £or origii^al sentence /o^ there ar« undBfined words 

^ segonant aerttenc^ u^lng word JMn'rl^rs 

recovery (2,3 _i Fmt , ye* , errors , und*^ , Unaencs , sect , Pmode . Bx»nicyp« * : - 
(Vmode • bp: Pnv^de a bps>:ipr Pmodfi = niode2; Pnvode • skip; 
Fmode s modeZr Fmodv a mocSel; Fcnodfi « mjQda4 i 
Ptnoda = bpaegr PmiooKe ^ bpsQ^aj 

Ir 

*(«giiiientandparsfl (S, Pmt, Error 3^Uns»nt«^Sect^Einode,ExamCyp*,_) , ! . 
% mode s - try to partition sentences by findings 
% vhen a flndiAior i^^ sentence la fouMr g<» left until first 
% niodifisr found (if 2 find Inge are next to each oth^r, 2nd an« 
t conaiderad the findit)^ and let i£ cocteldered t))e modifier} 

% Repeat searching tot succeesive fl2i<tinga uain^ t-bla method 
recQveryiS, {] r n [] [J ._._.^r_._, J 
recovery 5 , aa , Fmt , yea r BrTOr* , ondef , utiaeritsif , Sect ^ 
PtooiSe , fipcaimfcype r _) ; - 

(PiDDdtt a bp; Pmode = t}ps)c3ipr ^mode = bpse^r Pmode =» keycnode; 
PcfiOde - modes,- Pmode » A^gmode 

) . 

p-reprocos3 (S,&si,_,_rbpakipl , * akip xind* fined wordft 

attionfindiugaegtBsL , P*eg, Before) r I . % get aegiMrtt CDntainiti^ finding 
(Feeg » [] , Error* - S, i ; * no fiftding to segment 
^Before a C] r Errora = Ba, FtfttJ. * [] , \ t * thia part «aa tried 
pr*]proceaffiFaeg,Bseg,_rSemiiatjbpskip) , 
^oaent { Faeg , Bseg « SenCL i. at , Pint l , rsaasd^^ , £ ect r ^ « Sx^mtype r 
7node£,_l * tjcy to parae finding ae^ment 

(B«£ofe - Uf Beforel a [1, Meaeage ^ 1i tici eegmenting yet - 

«Kip b«g. 

Kasaaga s yed, Sefe^sc^ei = Before, %dian't a.dd have to sHip 

Rkore 

Append (Before, [' . ' ] rS^forai) 

) . 

[ Faeg = [] , Pmt i nO finding left in sent. - doa^t recover 

r eco ver rea t <F» * g r _^ Before 1 , PtntS , Hea a age , Br rora , 
t , Hewmoda , BatAtMt ype , _) , 

* recover remainder 

append { Pint L , Fist 2 , f\nt ) 
f . 

* nothing could b« recc^vered,- all input -> Brrora ? Tonnat is U 
recovery <_,S«nt«,_r_, C] ,yee,Sent3, Vndef- [] #«._._r„) . 

% pAtt of phrase vaa akipped, add period «nd Created skipped pert aa a 

% recoverxeat (-t-Segnanc , +5emX&ot r -^Bef ore . -Fmt , fMeaaaga ^ -Failures^ ^section/ 
t +NDde f +BxaTntyp* , 

^ segment p4krt of aetitence witK a finding 
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t Semliat ie a liet of sctnantlc caitceoriea for that s^ttt^nce part 

V Before is the part of sentence l^&fore ScgwenC 

\ ^mb tfl the format f<?r this segment 

% Message ±s <tio' ie thar© is no aegsTtsmtic Infornv^iCion to lt>« recovered 
1- MeEea^^ is 'yes' othervi&e 

> PAiluraa are ilgta of 5e^nvent{e> that could dot be jvatsed Bucceflfafully 

k 6 action is section being processed, Modo is user sp^crified partying mod© 



recoveryeet (_j_rB«fpr«, [) ,no,BEfoJBl,_,_,_r_) 

(Before s []. Beforei % ADthing was skipped 

«kpp«7id ore , Bef oral ^ 

!. 

% nothing Left recovi^r.' write pbr«fr9 th«b was s)cipped 
recoverrestC t] ,_,B$foi:e^ [] , yea, Befot^el, 

{Befor* - 11- Beforel = ll , ! 

appM6 <Bef ore, B«£orel ) % 

), I. 

4r can recover partial paree 

recove rr as t < B» , _ , Be fore . Fmt , ye« , errors , Sect , Pniod^ . Examtyp* , _) j - 

tfrhecksfc(sani_p*tt«tnr_. B, Target, B¥rR«fltseg>^ % recover froffl cymbol tab. 
Irda resul t tTarge c , Fmtl . Bxamtype , 5e ct , modQ S,_) , 

forma tsreault (Target,niod«5,Fh)tX} , 
recurve ry { 5 , Re a t seg , R« » t j _ , Fm t i ^ yes , Brr c>r2 j 

[J . [] /S<ct, Pmode,«?caintyp«r_) * 
append (F^tl. F>«t3 ^ Fmt) r 

(Before ■ [] r Srr-ors - Error3^ \; Vfiothing alci^ped to add to 
append iBefors, , ' (Error 2] rBrrorcl 
> . 

* carniob rci?pvftr partial para* - *>ctp firat eiemsnt and x^try 

* if let e Lament is neg*ti<>n a-emantic type, ekip 2nd ^Lftment instead 
I; Handles cass >rK^re 1st element is a negation, certainty or atatud 

add 2i\^ fclefftftnt to unparscd sentence a list {enloosed la angle br^CH^ta^ . 
recover rea t ( [X , Y | Rea t seg] , _ , fore L , Fmt , y«s ^ Br r <^rs , 
Scc^t^Pmode,. Bxaintype.^J s- 
foundword fx r Semi, Tar) , 

r member iSeinl, [nag, c*Tt>inty,vcertaiElty, vcctnn, statwSt, vetatus] J r 

semi 3 p. Tar = ccmn] 

) . 

\(Hod - negr Mod =^ certainty: Mod = atatusr Mod = vcertainty) , * leave 

this mod 

pTeproc©B3i[X|R«St9«gJ .FsegO,_,_rbe^flkip) , % sJsip undafioed words 
f indinga«g<PCftgOrF3figjBefore2l , !, % gat fiddlpg eag 
{P««g - Crrora « [X,Y|R«6esegl , Fmt = tl >" * no finding 

preprocesa (Feeg,Hae5,_,K*»t9erri,bpakipl r * aklp undefined words 
doaent ( Faeg , Sa^g , R»s t a em , Fmt 1 , Mcc 3 age , Sect , ^ r Bx*mtype , 

inodeS»_J, * try to parse finding s^^ment 
rBoov©rre&t(P«es^_r (YjBeforeS) , FTBt2 , Hfisaago , Br jrorS , 

Seet,neglIlodB,aKartltypeJ__^ , % recover raraairtder 
(Before! - [] / Errora - Srror2, Tj 
append {Bef orel , [ . | Ert03::^2] .Errors } 
) . 

append (Ftntl , Fmt 2 , Fmb > 
> . 

% aXip let eleement.; «ncIoae it in brai::KetB 
recoverraatf [X|Re«e8eg] Bef orel, Fmt ^ yes. Errora, 
sect.^BnkDdavBxamtYpe,_) 
pif>eproGeBB<ReBtaBg^ PaC^O,^,^, bpahip) , 
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f ij:idin5Sfig(P3egO,Fs<g,EQfotr«3> , i, % get finding scg 
append <BBf oral, lX|»e£or«2) .Before) . 

CFs«? - [), sryors = (X^Keateeg] , Ptnt « (] ; no finding 
pr«prQcesB (Fseg,Baeg^_/RQ«tBeiT»,bpakip> ^ 
dos^n t f Pe«g , Baeg , Ka c t seen , Ftttc l , Mes , $ec t , _ , ExA^type , 

nvscaoS,^), % try to fitiding aegcwnt 

r ec<>ver rest (Fa Before , Ptftt 2 , M^saage. Error* , 

Sect , Me wmodfl r Examtype , _ J , * r tcove r rema ittd* r 
append tFmci ^ Ftnt2 . Ftnt) 
) . 



* no eemantic irbformatlon left/ return Bs^rors 
r^cDverreetl CX|Reat:3eg] , (]; Before I, Fmt, yes* IXjKeataeg] « 

^dDsent. (45, -^SeTnllBt, -Fmitlier, ^M«tSBage,4-5ection, +ffrLtB)1escai9e, 4exacpty;p^e, 
If +M<?dc) 

% & Lb original liat cif wcrda in s«nt«nae; Bb is Li^ti aft«r lexical Lookup 

\ SeinLlat ia liOC o£ s«irantic ?4it.egorlea covrftSpcnding to Ba 

I Prntllac 1« 11 at: of taxgat fe^ma for sentence 

% Mesaagfe la 'yes* if the output froio paraer algiiftl4 a failure r 

% and 'AO' ot3i«rtariBe 

% dection iv paction of examinAti*^ being processed 

* WrifceWesaage eignala whefctiei? an error occurred in generating t^rgfet form 
\ Bxamtype ia cha dOAUtin, and ^3od« i* the user specified mode of parsing 

% Farae sentence and jracurnB target ift neated forntftC 

% Handles ca$e vbera aentence should be skipped ia^cauae inf« ia about 

^ family tnemljer or peripher«i to patiant 

doaent (s,_,SQtnlist'r n • Ertf<^5tr_,_^ 5- 

aJclpaaTitanc* f ^^mlist , Errorl ^ ^ . 
doaent (fi.fla. SeEfiliHt.F^tliat^BrroJfrneg.deotiQtt. wrlcefail,ax«1«type>^Sc>de,_^ : - 
atkemptparae(FrSa, sarttence, Semi iat^ diction , At otal) r 
( P = Cfa£iwre3, Errormag = yea, Bi-itefail = no, * ^ parse fallura 

P - [1 ^ Errormag = no. wsrlcafail = no, Frntllac = [] , 1 % ampty. target 

? 

%dDr«sult{P,FtatllBt,Examtypa. Section^ Mode . 
format result (P , Mode , TmtLiat f , 
Errormag = no, KrLt».€*.il - no, 1 

Srrpmug - yea« writefail = ye4r 1 

) . 

»parae_sentanceBtB«>gjBeg, U # CI j - ■ 

% attaniptpftrae {-P , +aa , +structure , 4 Semi tat , -Ftype, -TOtai) 
% P la output from parser 

% Ba ia list of wofde in sentence aft^r lexical Ioo)tup 

t structure is nairte of structure to be parsed 

* Serolist i» I Hat of aemantic categories corresponding to eleioantfi In Ba 
I: TotAl ia nuihher of timaB parser reached settt_Bent in grammar ; 

i where aem^Bent is "higheat level predicate in granuwAtf 

% don't parae if sentence consists ot only or 'r' 
atteiwptparBet [i jBSj # r^#_) 
Ba - Bb -^Tv-r^) . 

9r if a teinpl4ite exists far vhola aentert^e, gat parae from, it 
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actQmptp»rB« (P.Bs, sentence, : - 

m [x.' .'K is_liattx) , i^tiw whole aEtitence is o finding 
f lnd_s«r«_sanfc ( P « X K ! . 

\ parafes and rstraecs weLliorcnea string tabl« - pareee sent^nca 
att omptjaree IP, r Sentence, Seoiliac , Ptyp» , ACOtal ) : - 

rfttractall j , 

ratractall (addatotal , 

&em_Bent4PrSeinIist,AtotaL, Bs, [1 > « ! . 

\ par$«S and retracts viellf<>rm^d stxlnd table - par«64; bodypart only 
Ak^em£^t:parBe (P, Bs,bi>dypart j- 

seiRjbodyloc IP- Be . O ) , 

retractallfwiat <_,_.^^_, \, l. 

fcaegmentandparse l-»Santences, -Fmtl^$fc, - Pal Lure a, -uneentj tSscition^ -i^Nb^e^ 



% 4-Examt vp« r + Sefttnol 

% Se^tenceB la list of sentence s«^&nt8. 

t nntliat consiats Of the formal &te4 output for the sflgnianta 

t Pallurv^ is the 11 at of uApal^aed sagmant^- 

% Unsent is the lia^ c>f Jp^gmencs with undefined words. 

^ Section ia the b act ion being pxoc«ased, K)od« is the u£;er specif led nu^de 

% ExaiTttype is the domain and Gtntno is th« ^etitencQ id. 



segmentandparseC t] r 11 r n * f] ._^_r_._l -- !■ 
sagmantartdparse (sentences , Pmtlist . Failures , ynSent , Section, Mode, 
Bxamtype , sentno) i - 
get^aentajic* (S«nt^ceB,5r RasstK 5* ^santcnce to Bcgroent 
prspx^cftaafs^si .^rSamlist ,Mode) , i , 
(Jlode * EiKsdeS, rtewPrtiode - bpBBg2r 
Hode = niod«^ , NeiAiFmode = bpsedBr 
NewPmode ^ bpeeg 
1 . 

( aeginentl (Sl^fiega^ n r ^39) / li 

par B tM ence a ( Segs , Ftncl , P^i I e , _ , CInl , $«et i on r KvwPmodie , Sx Amt yp« ^ 
Sentno . Sentno ,0\ , \ 
; aegmenta <S1, 0eg« ^ [] rfieg^ i 

pa r©Q_Bent«nc« e f Stga , Fmt l . P* i 1 a , _ , Unl .Sect ion , NawProode ^ Examt ws , 
San tni;^ , S entno r C ) , I 
f *egtftent3 {sirSegs^ [S .JTegatatua^sag^ , ! , 

parae^aantanc«« fGega r ^nitl , Fails UnL , Section, KewPmoda , Exaintyp* , 
" Sentno,SeAtnia*0) , I 

t SAiJLa if cannot se^^nt sentence,' otherwiaa assents remainder 
segmenrandparsa CRadt^ Pmt2 , Hex terrors , Next mis , &QOtion^Mod«« 

Bxatntype , Santno > , 
appandfTintlj ^i!2,£:intli8t) r 
append (in^l , HexttniB, OnSent \ , 
append i Falla , dext 9r TOr« , failure a > ^ I . 

K 0 fegmentl U s , S eg b , ^Be^ r +UeBaage ) 



\ S is Lidt of vfordB in sentence 

% Saga consists of aentance a«^ent.s as sap«irate sant^nces 

Baig if li»t of words in seAtenca pri.or to the currant portion of santatic 
% \^%%^%% ift 'eeg' if aagroet^ting aucca«4«d and ^noeag' otherwiee 



Begniwntini , I] r_,noBeg> s- \. 
% sagcnent sentence at connect phrase /word oy at moat conjunctions' 
% if negation precadad, restore negation 
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\+ sfefti^^-ndinarkdiest, * d<?n't asgment if At end aiJ-rfeady 

f pundword^X^fiflTn^'Target) r ^ get stmanCic elaeaif ication «ftd target 
< X - nor, Append ( [no KR*4;t, Rem) » ok to segment «t nor 
fX = without r «pp«iid l[A0l .Rest.Ram) \ oft to ae^trtant at without 

• scm ^ neg. Reat - [Next |f^est2i , t have nj^a^tion.- tc5t vM>rd aiter 
f ??urtdword (Next. Sem2. Targets 1 , % for connective - add bftek negation 
teat£orcDnn(Kext, Sem2rT«rgeC2) , Recn - [X|Re3t2^ 
j te^tf orcc?nnCK, Scro.T^cgfrt) . Rest » Rem 
V . 

aegmentl aX^Rescl J U|Hewreat] ^St^^x^e^seg^ 

4ipp«3i,d {starts iXD^&Bg) , % part before sagnwiatatioii 
efigmenti (K«st, V^nrrest . Beg, Seg) • 

ceacforco]m(K,S&ni.TajDge&t 

i S«m - Target » lP,connl,&\« with * sekgwent at cojjnfectiva prep 
; member C Sam, [vcoawi.vahovj ) * segment at tt5eae type* of verb* 
; s«Tn = ceai^ \+ member IX, Land, or , % ' , ' / ' .aaD ) 

] . 

^ scitgrfttnt at certain wori^C - 
aegTTient2 n] . [3 . [] .noaeg) :- I- 

5cgreent2 (s, Sega , [ ] , seg) s - 

crtdmark<R©Bt , [J? , ? . 
BB^iTtetitS ( |Reat] , [X|Kevr«j;t] , [] .SegJ : - 

aegrnentS (R.ost, Wewi^eat^ 1] ^Seg) . 
Beg2([x|Reacj .Rcat, . ' , '«t&os>^ jRem] ^ 

rft^mi^e r ( K ^ [whi ch , th*t , unti L , whex« , when , whi Is , wJx* , 
' C ' i ' 1 ' . between, whereby^ after . UeCore , prior, 
greater , rAnging] ) , 
Rem n Rest, ! . 

segments rto^eg) t- !. . 

* segment at conjunction - if negation prec»aedl conjunct ion, add 

aegmexit3((X|Rest] ^RemrBeg^M^e^tftatua.aeg) :- 

\+ aem_endirvar>c(Restj * aic«ady at end *f Hent^nee 

Beg3 ( CX|Rast] , s«m/Beg,HBgstdtU4, aeg} , I . 

aegJ (LX|Re«e) ,Rftnfi,Beg,wegotattts,*egJ :- 
wdef (x.conj , 
member (X, [and,i>r, ' r ' 3 ) , 

tnonvartW^gjtfttua) , Rem = ( ' . ' ,Neg3tattt*|HefltT, I % restore n^ac ion 
f R4m - I ■ - ' , '<eos>' iRMtD . i 

) , 

sag3< CX|Refft] , C;t, • . • , • ] Reat J scg) : - 

£oimdwDrd i% , a9«> ^ ! . 

ae.giitX|Rest] , | txewr>sat J , start rWfc^ statue, S eg J 

( nonver(M^atatus) r l; t l^t neg already found - continue aegmentipg 
f ou»^wi^vd{x^ senir Target } r . 

( Target - no^ Neg Statue = i : 
Sam s neg, H^gatatua = 1 > 
Sam \w t»eg. Target \= no, ! 

) f 
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true, i % word 1b uMe fined 
). 

append [start, tx] , Beg) , % part before aegnventacion 
aagrffient3 <ReBt.Kfewt$Et,Beg,Negstatas, 6*gi . * • 

1 for findting type claaeea - pare« fr-S a a^ntence 
wha btoparse 15 em , ^ , Sent ) : - 

iMrtiiber (Sem, [cf inding^^pfi nding,n»rph,dis««is ft ^device, proc,«proc, descriptor) ) , 
attBTnptpar^e tP, Sent, aentBnce, [Sew? , Lropre€sicin,_) . 

%- fcrr bodyloc clasBBS - parse & bodyloc ind>dlfier 
wha ttoparae t Sem , F , 5 snt I ; - 

mecnber ( &on> , ( bodyloc . regl on , s ide , poa i t ion J ) , 

*t t empt^aY B e (P , Sent , bodypert *_t_,J^ . 
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t file rAdr&c.pi 

% 5«pteTnber tj 199S' 

t fall an unJcnown predicate 
3 -unknown fail) ■ 

DP (^00, ~Cy, t\+*not,oncflJ) , * sa™^ priority and type as 

opl^DO, xfxr CN-^-sJ). t same priority and typo - or 

J- dvnainierCdocftain/lJ . V domain twinj proc«S6*d 

■ - dynamic < output *£irm/l) . * form of output {n««ded to distinguish 

% mar)cup Of text from formatting forms 

dynamic icurirentseDC /l } . * a act ion for output ting x^^aulta' 

test genoin«fOutfilQ,BrrfiLfl,Unf 

"gec_±nputs«nt* 1 13 .ToVli^t^ , i, 1 rB*e3 ift and tokenism Input 
^Tokliat [1, J, i error condition 
^^^^♦^{^^Oucf ILe, ' tfo input sent'), 

par se_seffitartc*s ("Tok 1 let, Frht 1 ia t , Pa i- 3^ ed , mde f , tfhSen t r impre a a i Ofi , 

oi^teutreaulta(FmtliBt.Paii6d,^J^rfile,tliridef .Uo£iie,iJn5«nt.outfilB, 
full . line, genome , l, 0 . «xe, plainj 

outputreaultaEFTTitlistO.I^ailwS.Etrftie.un^ef.UtSfile.TinSent.Outfile, 

Amount , Typ^ , Exam, Ocxnpna, DocComp. MewCompno , Caller , ^rdt^coD 
t«lHOutfiU^ 

{Protocol = agml, i , Op a *sml; 
Caller = earvwr, 4 , Op - sgrolj 
Op > pialnj, 

(Typa . neaced, i , * original output form - peated f indinge 
write C ' <noste^* * > /ft*v/_lln& (Op) , 
write iPrttlifit) , nfiw_rin6(0pj , vi^ite 1 ■</iiBstecJ>' > , 
new li3ftfr(QpJ , J 

J . 

(Caller - B«rv«r^ 

wri t*_ffleii«a^fe (unfile , undef , C*ller , ' <unde£Lned> • , * < /undaf incd> ' ) 

Ca 1 ler = exe , Undef C ] , 

va-itQ_Tn^«&*34(TJft«ile,cmdef .Caller. undefined wqrda 11 J 

^ %vriti_hlghlight ( [ } ^UbSent , CalJcfl 

true 

tC«ll«r * seafver, 
write ('<nc>parfle>' ? ^ 3 . 
write_highlight lUnd«f ,X?na«flt , Caller J r 
writejiighlighti 13 . Palled, Caller) , write ( ' c/rioparaB>^ > 
i 

CAll«r - ftiee, E^rCile \= [] , Paiiftd \« 
t*ll (Brrf lie) , 

writer***** sentence a /Phrase* Mot Paraed ♦**»♦»), nlj 
%wr i te highl ight ( Ctade t ^ anS«»t , Cal ler ? j 
write^highlight ( [ 3 . Failed, Caller) 
f 

tcu« I AO Brrf lie to write to 
) , 

% fet_a2fga' Praoeaa qptlOAfii 
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% - -p ProbF-il* {otherwise def Ault is problem onw^^gee are v^ritten t9 f 

t '4 infiU (if input 1$ supplied by file ^nd not standard it^put 

t 'th Mode t<Ee£ault Is the 6 choices are tfp, model - n»de&) 

% -o Outfile Uf output should be file and Dot atandsird output 1 

\ -? Provide list of default argument s 

^ -pr Fzotocol " dgml or plain {default is plalnl 

V -wi Undefs (othenvij;^ default is - undefined mt^eages are not v^ritten 
^ to a file) 

setarge (Ar99, Mode ^ infile. Que file, Prbfile.Undef, Protocol t : - 
s« t_niode ( AT gs , r4ode ) , ae t.amouA t f Arg b , Amount ) , 

t _prot ocol ( A-rgs , Protocol ) , 
set^inf ile fAxTS^ r ^c^f il« 1 1 aet^outf il4i (Arge , out file | , 
ae t'prbf i le (Arge , F rb£ i 1 e ) , set^unde f a < Arge , UUde f 1 . 

<et_mode(ArgB, Mode) 

(nexttof ' -m< rM. Args) ; ne^tto {m^M, Args M , !, 

models lM^Mod&> ^ J . 
sst_Tnode<_,l>p) . V default output type 

mode i a Cralax^ modes) J. 
modeia i stx i «?t j ^fK^de 1) -. - ! . 
rted«i5 (»>ctpvmode4) k, 
rnodela (longest r mode 3) I, 
oiodeifl (boet.tap] i. 
rtodei< (mod^i.fRodei) i. 
models (modeZi iToi^e2) : - i . 
TiiodeiB (modea , tnod*2J : - ? . 
TOodei 9 C m*d»4 , made4 \ - ! . 
ittodeifl (modes.cnodesi i. 

se t_p rot 0004- [ Airga , Prot ocol ) : - 

(next to { ' -pr • , protocol , Argil ; nexcto ( • pr ' , Protocol . Argal J , 
meiriber {Protocol , [sgvnl , plalnl ) , ) . 
set^rotocol plain) . 
64t_undef»{AJeia.undefe) 

nextcof '^u' ilftidefa.Argfll ; nescbtofu.undefs. Arg«) , !. % undef file option 
set^undefsj^r [J ) . % default is no file o£ undeflneda cr«Ated 

® ot_iti C i 1 e iArgs , Inf lie) ; - 

nDtlvar^Infile^^ j; % intil^ i« set already 

nexcto I '-iMnfi2«,Arg»>, !; - 

naxtto (irXn£il«, Arga) , \ , 
BBt_ii]f ile [_pg<eir_lnpat> . % default is standard input 

B at_pr bf i le t Ar ga , Prbf i 1 • ) ; - 

nexttot • -p' ,Prbfil*,Aa^9) , t- nexCto (p, Prbf tie , Arga) , t. * prob file option 
aet^rbf ile^_. UK k default is no file of problems iff created 

set_outf lle(Axge,Qutf ile) 

nonvar(outfile) , i; \ Outfile is already set 

nextto( • -o* ^Outfile, ArgsJ , J; nextto (o. Out file, Arga) , I. * out file Option 
Bet_outf ile<_,u3«r_outp«t) . default is vt^ndard output 

naw_liTi» ($^11 . ; - wite ( ' <br> • } . nl. 
neii'_llj:ie feerver) j - write ( ' <br > ' ) , nl r ? . 
new_line taxej nl. 
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%/!rite_maBBAg*(_, CI ,exe,_,_) 

writs []ine»aage(_, I] ,piainr'._^ ! - 

wr it«._iti&egagQ < Fi 1 e , contents , Ca 1 1 . Segmeg . EUdiReg } ; - 
r Rfember (Caller, [exe^plaiii] ) . tell (File] , I 

true) , 

(Contencs s Hr vrice lis-t (concents, 11 ^ nev llne(Caale2^^ 
(Bn^SiTLBg 9 [] , f ; 

vrit« (Bwam^g) , new iin.«<C*ll*r) 
) . 

Bentend UX |J , Caller) i - 



$«ttarg*t» n ignore I RflBtl , [ igiiore|liest3 ) =• I . * j^oeaibly i^ft^re info, 
gectargeta ( [WllRast] r (Ti (Treat] ) 3- 

foimdword<War_.Tl) , * target f o*^ Wl 

g«fct.Ax^etB (aeac , Treet) r I - 
gettargfita (W, W) - % not in iftxicon ^ 
iBneg<Xf 

int:»r«e«t fX, [n&,negativQ>denyr ' i^nile oot '3 > . 

iteoutaent < [Kosd { R4S t}) \- 
tfrit«<'^''), write iword) , writ»<'»"), !, 
{Word » writeC'''}^ j; true), 

(Rest \s []r writ«(*r')i wrlteouteent (Et^aC) . 1 7 . 
true) r I . 
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\ This file cont&ins predicatee associated with dOML tags 
% ne X tTag [ •»!/ , Tag , - PrcTag j - Pos cT*g ) X s 1 1 we 1 £ 
t 1/ la the fttekrting Liet. 

% Tag is ^ tag; it <;ould be a variable or InstAn^lated ft I ready 

% PreTag DOrtion q£ I. preceding rag 

% PoscT*0 ±s portion of l foliowing Ta^ 
wxtrag {Lr T»g, PreTag . PosbTag) j - 

appQna<P5ceTag, [ ' c ' ,Tag, » > ' |P«ri5tragl , h) . 

% cndTvg (-rL, +ragr -Pre « -Post) Is true if 
^ L is th's stArtir^g list 
V rag ia the SGML end tag 

% Pre is the portion of L preceding the etid of tag 
t P^^t la the portion of L foil owing &be end of ba^ 
enidrag {L, rag^ &re^ Post) : - 

append((Pre, L'C, '/',Tag, . Postl .ij . 

t eAclofledPart(+L, -fT^g, -Encfloaed) is true If 

^ L ia ths jst^rtivig Liat. it is A^^umed that L ±b portion of socn* 

* Lt4t that follows a begin tag - i.v, 'c^,rag|L 

k Tag ia the t4g 

% BnclofiBd ie the portion of teJct enclosed in tag; nob Including 
% »nd t4gr. 

enc"'^>cedPftrt: (L, Tag , Enclosed, Post J s • 
endTag {t, , Tag , Sncl oaed , Poat ) - 
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% eilii usafwl.pl - IcKicwi lookup «nd utility toola 

: -vnJcnown fail > . 

5 -dynamic (aentence/i) . 

opisOO, fy. Cnotjonce] ) . % aame priority and type \+ 
opC?OOr 3<-fxr t\=.-=}). % same priority aiujt typs a* » or a- 

% useful, pi PebruAry 21, i592 

k bra^k«tt lexical phraees and reirove words/phraees in 

% s]^£clal db of noise w&rde {nosem in Asphrase^pl db> 

t S is original eentenee 

}t B41 ie prepz-oc«s$&d aencencB 

t U id list £>f undefined words in sentence 

K Hoda is mode of procMs - ta akip mode vitdeCinsd w©irda ar« removed 

% from. pr&prDc«&»ed sentence 

preprooesB (SO^SdX.Uf $4m3,iir&de) • ^cfnew 

chcc)<b<5g(5C^,fi> , % if Jprgimiing ia 'Al ' ignora 

chtichphraflfifS.Sl^Senjl? , ? bracket all phraBea in ptaraaal lexicon first 
checkllBt (SL^UlrBs, 6ftTTt2,MQdeK ft che^:k that all ^Ox6b are in lexiconr Taraove 
non semantic 

checklist JB^rtJ, Bsl, SemJr Mod*) . * check f^«r phrases after non*s«<n ajre Yemoved 
t jipp-end ( setfij. . Seni2 j S-eml ) , 
Vappij^d J Serol r S«?n3 ^ S wnllfit) , 

Vunlon(Ul.U2.0J . 

* found ch^Cika if word x is dttined aa a single word, if X st^rta a defined 
k pKraea 
foundword [x) i - 

fcwod«ord(xj 

»defdniki<7n ff<MK tagged input 
f own^iword t>:) 

phr(x,_^,_,_) . 1. 
f oundwor d ( ? X | RiiS C 1 ) = - 
R«frt \- C3, 
phrasAl (X, [X [Rast] , J , ! . 
% i/S9 added foundwor^ to search tha seniact.pl litaticon 
% phrasal u&ind; qeic^ waa added to utll.lp 
% found/ 5 returns aamantic catr of word 
fOU«dwQr^<X,Senii) t- 
wdef {X*S»inr__) . 
foundwor d , B em) ; - 

»ftTrt«(X,Sen!,_,_) . 
id«f inltid!) from tagged input 
fowndwrd ( JC / J9 am ^ s • 

pbr(X.&«rt, CJ, j. 
f oundword ( tX, | Pest] ^ Sem) s - 

pnraaailCx^sem. [x|RQBtJ._J . 
%- fovifld/s raturaa aetnantic CAt. and target form 
foundword<X,S6m, PonnI t- 

wdaf tX,3^, Portn^ . 
f oundword tX^ S*rt, Poitift^ ; - 
s«mw (K^ SeRi, Form, . 
Vdefinition from tagged input 
foundwoi^^ (X, fi era f Form, 

phr{X/Seror [] rForoi^ . 
fOttiSdwordf ixjteBt) rSewirPortftJ s- 
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phrasal (X , Sem, tX | R^sc 1 , Torm) , 

^coLIectcemC+Vord, -dsTn? : is the J- let of' semantic cl^se&a corresponding 
\ tQ word 

t iftis«ing checks if a w&cd pireicent in A ^encenDe is defined 
miesixigix^ !- 

meinberfX^S) ^ 

not foundwrdCX), 

k c!jiec:5tbegUS0, -S) chficJca beginning ftentence? if it l>egin« with a letter or 
It number followed by a ■ ) ■ , that pAtfC Is ekipp^d 
obeckbe9((X. ') ' |ReBt) ,KeBC^ t- I, 

% chec)cB every word In a liat to flee ±£ it is defined,- cirtAtes 
^ ^ Tt«w lij^t of lorde not defined, «)nd a n<w liet oC sentence 
%■ rfhere phrases arw lata CKe ted . • 
checkliBt ([] . [1 , tl, ID ,J . 

t If X is a li^t it haB already been identified as a phra^pe in phr«»al lex 
«h«ekli*t ( U jRest ] , Und«f , Ke wrea t , Semlist, Mode \ : - 
ia^Liet (X) . 

chQGk_no_6emf [x|Reat] ^Rescl,_) , 

che^klint (Heatl, Undef , tT«wreet,seniliat,KddeU i. %-is phrase part of noaem 
ohftckliat ax|ReBt) rUndeJ, [x|WevreBtl , Semliat ^Ffede) 
^coLlacts«ni(X, SemJ , 
is_liat(X), X =v [wilTail], 
phreeai twi,Sem,X,_) , 

checkList (Rest , Undef , Newreat ^ 3cin2 , Mode } r I r 

appMdC C€en] rSemZ, Semi i at) , 
Checklist < [without | Rest] , Undef , Newrcst . SciwliBt, Mode ? ; - 

checkLlat( (vith,no|ReBt3 jUniSef rl^eiA^i^eBt.. Semiist,Mic>de> . 
% this problem has to be fixed in preprocessor 
t check tat a number with a ■ , ' - "Ll«20&" and fix it 
VcH^ckiistt [X, ' , Mf|»c6tl .undef , LJr|Mewrefltl , Inmiib«r |SewliBt) .Mode) 
V number tK>, ti««iber(y) , h is X * iflOD + Y, 4, 
% checklifft^Resc^undef .Hevrrat. &emlifit^^Socael , \ . 
t cbeck for a literal nuciiber ^cfnew 

crKftckliat ( Cx | fteatl , UnAnf r IK | Hewreet} , (nurnber | Semlistl , fitode) ; - 
number [X) , 

c:hei?kliat [neat, undef,tlewrej(C,fiemliBt, Model . i. 
% beginning of ixiat is a prefix of a phrase chat 1b a coital ate fS.nding 
Cfheckliat (liistrtlndef r C^hra«^|Netfreatl , [cf indinglSemllatl .Mode) r- 

checfc_BQm_^f:indlng (Lis t, Res c, Phrase J . 

ch«ckTi«t7R«etfU!ndef jHBwreBt.5emLisC,M'=>d^?) , • . 
% be^innie^ of List ia a prefix of a phrade that ia in jifteemantio lexicon 
iSfteckliBt f Lie unde f .Wewreft, SeTnllBt, Mode > : - 

check_no^3esii(Li*CrReat^ Phrase ^ ^ 

checklia t < Hes t , UMe f r Newr ee t , ^eviil i 9 1 , Kode > j K 
* beginning of List la a prefix of a phrase thfl.t ia in phrAffal lexicon 
chao^iiet (List, Undef, [Phrase [Ke>.sri-eat] /SemLi£;trMiOde) 

get_lQngest_a6m [Liat, Rest, Phraae, Semi , 

t Che ck_Bein( Lie tr^eat, Phrase, Sem^ , ^change to gst longest phrase 

checkl 1 at {Raa t , Uride f , Ne ea c , Sem2 , Mode ) , i , 

append (Sein^3«Tn2r5emliat J . 
%r beginning o£ Li9t is a single %^OTd that -is in sernantic lexicon 
checklist ( {X iRestl , Undef , [X |Mavre«t] , senil ist .Mode) : - 



46 



BNSDOCID: <WO_0063687A1TI. 



wo Oa/636«7 



coilec&sem (X , seen} , i , • 
tfDuniftword (X , Bern) , ■ , 

checXLlat (l(«at, Un^Sef.N^viyeat, 5«m2,Hode^ . i, 

append (Seen, S«m2 , Bemli^t) . 
& beglnniA? of l*i%t Ls an undefined vord 
chockl ia t ( [ X I Re« C 1 , Urtdsf a , Wr e s t . 5eml 1 s t , Mo«i a ) : • 

ch«<kl ia t i Re^ t , O^iS e f r Ni^wrtet , semi i s t , Mode 1 , 

(menibar tS^undef 5 . ! .• xjhd^fe = (X|imdefl. !), 

<Mode = sSeiPi ). Nreet = Wewreat: 

* if begicmin^ is a ntamber followed by a . followed by « tWft nuinber 

pK«ckphraae(Cx, .1 , CX, 1 , []) J. 
chec)cphraae HX, . . Z|Rest] ,y, Samlist) - 

nuTntaer ?X1 , not (nwnber (3M . ch«ctephraafl (Pe«t , Semi i»t f , i - 
% bcgiftrting of List is a prtflx of a phr«ae that is a coraplat^ fliiding 
% 01? A phrase in phrasal l&xloon 
checkphraBB^Litft , (Phrase [Mewrest J , Semlist) ? - 

(ch«i?ii_eeni_fliidingtList,llftfit,Phras*]i , fieiti • tcf irtdingl ? 
9«e longeet BQin(lri9t,Rfietj Phra««,Sein) 

>. iT 

*chBC>c_»cifi ( L>i a t , Re B t , Phrase , Sera.) 1 > I ^ 

«heeK!?hra e e {Res t , }/«wr« $ e , fifim2 ) r I . 

append ( Sem , acmS r S«tnl ifl t J . 
chflckphraae(fHlft<etl , Cw|KewresCj ^Semlisfc) 

chec)!pbrsBe tRefltj,J?awr»»t, Semliat) . 
chtcJcphraaei; (I , CI , 51 5 . 

check_3errt_i inding ([W i Tai 1 1 , T« i 1 , W) ; - 

ff = [Kl|i^eat). * X" ia brack*ted already 

chack^sem_f iniiirtg f I w J Ta i 1 3 , 5f ioA I r Pliraee ] i - 

a em f inding_0 »rtt , Phrase r^'t , 

bagauijliafttPhrase, twlTaiU ,S«lnal) , i . 
aem finding_9eatf_r_r_) fail, 

% chei3k_Ai5_««mC+Sent,-RBst. "Plir*»e) : rernovfttf Phrase fxotft Sent resulting 
% in Heat If sant b«giM with a phrasft in noaem {non-aemantic llet) . 
eh^ck no^aecn UK | TAil ] ^ 6 £i»Al / Phrase ( ; - 

noeemfw, Phrase) , %pbra4;« beg. vith W that ahould he r«<noved 

begevibllBt (Phraae, lw|T«ill , si) , 

i:eenov©_coiMnatfll,SJEifiai) , K * reinove ",0 i€ it ia C3«xt 
tffet_lDngeBt_BBiD(+£l*nt, 'Rest.-PhyasQr-Sew) : Phraao ift longeat phraae th*t la 

* a^pralix of fl^rtC; Heat ia rensaitidca^ and Seen ia List of B«mantic claeeea 
gBt^lort9«at_«ew{Sent , Heat, Phrase, [sem] > ; ' 

flfttt^f tK,check^aeqi(S«tt,Xl ,Li . * 5*t of Ptirasca 
maxphraaetti, Phrnff^. D) . % Phraee with (naxlrmim length 

append{Phras«.Reftt,Sent) ^ * ifeac of aetttence after Phraa» 

f oundwordfPhraae,SeniJ . 

% check_&*inC+S«fttr -Rest, -Phraa«, -S«ib) : checks if phrase beginning with 

t Sant la in phrasal l^jiiwn; Rest i5 the remainder of Sent after phrase 

% &eQn ia the aemant^c 

chee3c_«em< [w | rail) , Rest, Phtaae^ sen»f : - 

phrasal < w ^ 6 em , Pbra ae ^ ^) , 

begsubliat (Phrase. («|raiij rBeet) . 
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* thij? also obcaiiis th© Target form 

ch« ek_B Bin < I T»i L 1 , Rtet. , Phz-^^e . sero , Tar 9 et ) i- 

phraaal Seen, Phrase, Target) r 

bwgfaubllceCPhraBe, [K|Tdil] .R**t) . 

isJlistW , icncLcpBed in brackets Tneans it is a. phrase 

chei5ic_*eft> ( [ w I rail 1 , Tail , w, sero, TArgetl : - 

is_Liet(l0), %eAclosed in brackets maans it ie a pl^raae 

phrasal (tf 1 , SeEn, M , Target) . - 
\ chfi ek_fl frm t *• s e ot «nc«, -Thr*«*> ia eimilar check^cem/4 except tor fewer args 
check_sem ( Sentence , PhJf aae \ 2 - 

ch« t k_s em < sent e nee , _ r Phxa^e , _ J . 
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% file utiX.pI 

%ktkk%%%%%tt%t^% Utility Fredica&^s «%Vt%%»%%%% 

% fail 3La vxiknawn predicate 

ctp<900, fy. I««>t,one€) ) . % same priority afl^ft type X*- 

op {700. xfx, (V"*-*)) . t fiatne priority Aod type » or 3= 

dynainiDtwfsC/*) . 

dynamic (^.ddat&cal /I ) . 
;- dynmii?(paragno/i) , 
i - dynp-mi c ( s« ctno/ 1 } , 

dynamic <phr/4 5 . 

% wfac <*Rwl4r+Wumber,+Ae3r+FtTitj *60, fSl : vell-£ormed ayroJwi table 
t nulfi is the oAfTj^ of rule; Wumibet' the apfcicn number 

1 Rea is 5 succaas and f for failure 

^ 7wX ia the format (for swcceadea) ; for failure P^t ia [) 

% ie the senteciCB poslbion at tho «t&rt o£ ftuL« 

4^ s ia the dent^nce position wh«n mule baa b4«A coniplatvct 

V add to vifst 

AtfWlftt /aiile,Ntonbarrfte3rFtflt,50. SJ 

\* chackat (Rule, Number, Sea, Fmt, SO, ireault for rule w** aaved already 
\4 chccXst (Rule, Number, i.ftnt.&O-S) ^ » TC»ult from diffeYant rule aav€d 
( cfhecket (Rule, ^, Rea, Pwit, fiO, Sj , % different rul« pi^oduced B»n« raault 

aaeert (vfaMKulCrNUniber^i.Fnit, &0,3l > r 
aaaert^Kfst CBul&^number.Res. Pmt. sOjS) H . 1 . 
addet :- !. * nlways auccaed 



Sr chBcfcat , -NUfflber. -Res , - Ptnt^ *S0, - S J : 

V art wfst 



chack» to aee if rul^a baa bc«n saved 



V fcesllet (L, V} - is Y th9 h*Ad of liat L 
b«glist X =. V . !. 

It eplicQ<4Ll, : LI is a list of li^ta^ L3 ia merged list 

splics(Ll, L2) :- appfend (1^1 , L2 } , 4. 
tcplicfet [I , Cl> I- 
iepllceniJl r []) I. 
kaplica^ (XJ .X? I , 

*Bplice{[I]|lil].!:.2) !- eplicatlil,L5K ! . 
feapliceami ILI] ^LS) 3pllca(LL.L2) . • . 
%9pLlce{[X| tCJ]l ,L) splice fx, L) .! . 

% append (Li, 1.3 ) , 1. 

%aplic«f E}{ii;»l] ,h2) 

* <plic«(Ll,L3( , 

^ ' append (X, L^r LS) I, 

^-^pliccrel • ifforlca *fith r«latiQna which have Argi , . , . , Argn. 

It splices a Splicelisc in each arg of relation 
spliceral (Piadin^r Spliceliet , SpiicadI r • 
splice fspliceliat * Spl ) , 

(Findings [r«l,X)R«atI . splicearg? (Reat , Spl^ «p) # 
%Bplica< ( [rel.X] , &p] ^SplicedJ , i r 
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append ( Find Ing , 5pl . Spl 1 c^ed ) ) . 
^spli<=earg« - Splicss a ligt. lAto edkCh elBmenC of a list 
^^liceargs t [} 1] 1 : : S . 

epL Ice arg^ ( [ Arg 1 1 R« ert ] , 8pl icel la t; « fipl i.c«d } i • 

lrspiic« ( lAif^l, 9pliceli«t] .SarQfl) , 

appBtidUrgi^spliceliet jSargil r 

spliceargs (Rest .Splicelist. Brest) , 

t^plice ( [ [S*rgi] , &ra&tl , Splicedl . ^ 

append ( C^argl} , Bt^^t , Splic^tf ^ . 
lifttdl , [D) . 
list([Xl (J) .X) . 

list ((X|I.ll ,li2) list:(t.L.LiJ . 

append ([X] ,L3,L2) r ! • 

^ scrip (LL, L2) removea extra si^uare bracK«t« from L 

% B ia a Buf £iK i>f A and C ia t>t« difference 
difflist IA,B, append CC,B,A> . 

% S i» « au^^Ilst at be^, ot L if there a.2 a list Revt, which «^hen eppviuLed 
* to s reflulte in 1j. 

begaiibliBt (5rL,Rftat) append tS , l*eat^ L) , Ir 

% ch«c)5 5 tKat first alemont in ISet 9 hJifl semantic category in Semi is b 
f iiffltwora { [Wl I J , 3vnli«t) : - 

atGui(Ni.], wdef (Wl, Sem,^) , t le^mantic category 

TTieonber [&em. semlistl . 
£ir9tword< ,3»miitt) : - 

is^list (Wil r phrsflal (Wl, 9eiin,_,_> , 

m^StoeJf (^era^semliatj . 
\ removes phraaes from fittft arg chat «t« in nsphrnjF^ - lexicoA of non-xsem. 
phrase a 

remDVfl_no_se(n ( [ ] , [1 ) : - I . 

reoio ve_rto_ftem ( [ W | Ta i 1 ] , 3f in*l ) : - 

nosem(W, Phr^^e) , Vphrase be^. with H 

be^fublifltfphraae, [n|T»i]i) , SI) . %r«aiov« from seAteu^ce 

remove_<?omroa<Sa,S2K %removG if it is nejct 

reinove_no_a«m (&2^ Spinal ) ^ i. 
remova_no_s am UW | TaiTj , S t ina 1 J s - 

reitio>ve_no_B en (Ta i I , B 1 ^ , 

append ( [tf] , Sir SfinaL^ , !. 
remorve^coiKftB < I ^ , • | Tail] , T*il V . 
rerQOve_CQiKna{S,Sl ^ 

%■ remove_BBni{4-Sent, -Vew£ent1 : &ent is the original aentencv, NewSsnt ie 
% stripped q£ all phraase bhat «re defined ii% laxicon 
reniove_«eftt ( 1) r El) : - i . 
r flino ve_a am < s , Ne ) j - 

cheok_^aem<$rftGat,_,_) , % phrase in aent. is irt lexicon - remove it 

remove^B em (Res t , N«wS ) , ! . 
Y emove^aotn 1 8 , ) • - 

check_no_*ert (5rReBtj_> , % phraae in sent, ie In nO«em liat - remove it 

rem(^V6_fl em (Hea t , Wa wS ) , I . 
remorve_sem( [K[TailI , [x|tTawSn 

repnQv«_aeiR(Tallj]!rawd| , i. t not a phrase, process rest 
% reinove_worda(+3ent, -Newsentj : Sane 1% tha original aentence, Newfieot 
\ ia stripped of a^l mrorda that are in lexicon 
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r enwvc^ wo xds < [X | S« st ] , wBe e t ) : - 

r<fc7undwordU) ; nurnfaer <X I J , fe S >e defined in L«Jcicon 

rETnoVe_WQrdfl(Hfi*c,N«w) . HewReet » [K|wewJ, s V X is not in lexicon 

). 

%iviBxph r aae { * lilstofi^hraa &s , ■¥ Maxin / - M^xOu t.jTii bMeiscijen ^ 1 9 tru« i t 
& LiBCofPKraae i« a l±at of multl- word phr^^ea^ 
% Haxi-n ia phr^ise vlth iMiKitcoiM ii»rdB &0 far 

% Massout is p}ira6e wieb naxlmum len^toi of phras«s i» bis Phrases 
% initMaxIoen 1b length of initial pliraae wh^ch ia of tnax- length 
maxphrase I [) ^Maxin^Naxin,^) :- j. % no moro {^hv'&sea - raaximuvn is it&mB as maixin 
Tnax£>t%ra9« f [P | Reet ] r M«xin , mJ^C/Out , ini tilaxL«A] : - 

length (P, ijQn) , % length of first (ihraee 

( Len ^ InitHaxLen , i , maxph r&^e ( Res t ^ p , Ma xout ^ Len ^ ; 
hen c initHaxLen , { , tndxphrase (Ke«&, Maxin , Maxout , In^ trtaxL^n) 

} . 

%i%*%^^t%%%t%%*%%%*tt^%4%%V l«xic*l interface pr«<iicatee %tt%**%%tt***^%*1r%»%*% 

*AC:Clftxf&<trtl,W.SDrS) J- 

* output£orm<htexfe^ , ^ acclexi <5«in, W, so, 5} . 
acclejc iSem.'KrSO, &J i- 

^tcL^K (Sfem, W,SO, S) 

acclexsfl (Seiri, SyftrTargec^ Features^ SO, S) . 
% check lexicon for word or pJjT***, Target fi^irm ie original W 
jicclifcxKp, tP,c] , [Wlfteat] ,ftc$t) !- 
iB_^lLafe{M| , 

f ind_iJCirti_phraae [F.Cl ,W) . 
acciexKe, IP.C] , [w|S] ,S) !- Atomfw). 

«def (w.p. {P.CD. 
acclexi<SeTn, tWj , [w jRestJ ,Reet) ? - 

is_liat(w>. «^1£ bracket «d listr s«t ^etn and Codv from phr««al lexicon 

f ±nd_sem^hrae« < Sew , _ , . 

acclaxl{5em, Wr tw|s] «Com(W) , 

% ciiec)c lexicon for wrd phxase^ T-ar^«e form is- tJlK«n from l*5tlcon 
Va eel ax 2 (Sett!, Code, [w|Reet] ,Reat> 3- 

* ift_ltat{wK Irif bracket*^ list, get Setft and Coda £rom phrasal lexicon 
% f ind__B em j)hras« ( S€m ^ Cdde , M ) . 

acclexa (Settle Cod*^ IM|S] ,S) ; - f omidword iW.Sem.Code) r 

nOTiv*!* iCoda) , * prot«ct again et 

lex. err OAT 

% tini\ a phraae [1f|T«iil] In lexicon that b«gina with V «nd haa category 5cm 
f ind_Be!ni3hraBa <a«m. Code. [V \ rail] ) - 

phras«il (if^Sem^ [w[TailJ , Coda) . v plirase and code be9, with H 

nonvar^codel . 

% irhere phrase ia already br a eke ted, looK \if> phraae 

sert_finding_phraBel (Coda, [MlTaill .Tail} s- 

ia_Ii3t(N)r %phza»« ia bracketed 

f ind^setn^aant (Code , , 
nQnvar(Ccd«) , (protect ag^ia^t lexical tirror 
\ caae wharv phras« i« already bracketed, look, up phrase 
aem^f indiug^hraae^ tcode, [WITailJ , Tail ) : - 

is^liat Cw) , tphxase ia ]|>^acketed 
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l!lnd_sem_eent <code , W> . 

noAvar tCod« ) . %prot©ct asaineC L^xlcsaX. crros^^ 
* Phrasal agccaada if Lexicon c<>ixtains ptiraae 
phraaaHKl^sem, Phraae.coae) i- 

phraae (Wl, Sam ^Phra$e, Code, ^ittulti- v;ord phrase in lexicon 

^ihir fwi > sem, I>nr* & © , Code ) : - 

sentp (WL , eeoi, Phrass , Code , Features) , 
% lexicai d«f inlcion from mariccflt up input 
phr*4!Al rwi^Sfem, [Wi I Tail] .code) 

phr (Wl, Sem, tail f Cede) ^ 
acclexsa (SerTi^Syn^Tar^at, Fi^a&ureBj (v|s] ,S) :- 

atoaifwl , 

&c™(w, sem, Target. Peaturea} , 
synvf(Wr SynclABs) , 
member I Syncfla^i , SyrtJ . 
acclflxsa (Seen, Syn, Targe CFeAturee, i- 

f ind_phra b bss C W , , 5yn , Target , Fea Cure s > . 
Slnd^hraseBS I [wi | Tail] , sv^. Target, Feat-ur^s > ; - 
5«fTQp<wirSein, [Ki|rail] , Target , Features) , 
synpiWl, [Wi|Tail] rSynclasi) , 

t lexical definition of a complete fiddiA^ 
f ind^Bem_aent (Code , [W | Tail) ) : - 

sein_f indiiig_» ert t (H , [ w | Tall ] , Code ) . 

liatify(C, tc]) :- 

atomCC} I I . 

ia_llet (C) , I . 

% distributes l«£t mods and right moda over li«l: i>f f Indinga cr*«CiA9 
% ll^t of llQta of Cindlnga with mcds 
diatributemodat [] ^ [] ,^,_,_) j- I. 
dxstributeniDdatDiat^ [Dl |T« ill ^LMQds,l%modB, Type) 

- di»tributemo49^Diet2,Tall,LTnadB,ftm0^e,Tvp«] , tdiacrlJ&uted for remainder 
nierseiADdB (Lnhoda , Rmode^ Allmods) « 

frame (D, Type, fli.Allnioda^ , tType framB with mod* 

app«n4(CDK&i«t2tOiflt) . % CocnbiM findings to get li^t of findings 

I ffl«ce«d - if lieftfnods baa [certainty, n^] , and conj a or, cbfnge Conj to and. 
t no & or B » no A and no B? 'deni^^ A,B, or i^ &iei)ilar. 

f ixconj (Lef traodfi , Conj , [rel , and] ? s - 

<tn»fnb*t { [ei*rt*lnty, nqj ^ Lef tmoda) ; misontoer { (certainty r dsnyj , Lef cmode) \ , 

Cond - [rel.orl. 
f ixGonJ \_f Conj J Conj *J . 

% write^Benteucea/l input* a PROLOG list and prints out linee 

^ vtiLcti which or* Ga^lisrh e^nteneea . Ho wrapping is done. 

wite^aentencea f [X] ] write (x) , nl. % specfial aOTtence - section nam© 

write < ' <p/> ' ) , nl . ftr paragraph nwifK 
write^sentencea ( [x|Reat] ) ' 
uppar_first((X|Reab) . [U|Rs&t)) . 
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write (u) , \ Firet letter of f tr&t word made upper c*«fc 

IX - chkforpunct(D,Rest) , I, write_terma (RestJ ; * no Space needed 
wrlCe<' write t«rinslRe5t> 

I . 



V WYitg^senteiie«/3 inputs a PROLOG Itat and printe out «n English 

\ sentciice wrapped. Idlen ia the etartiftg poaition of the seheence 

t in the output. 

* uSJea llbrarl.^5 <ttypeB/ te*fiitc, not 



write_safttence ( [XlRest] , Idlen) : - 

upe>er_£lrHt < IX I Rest) , [0 1 Rc*t ) ^ , 

wrice (u) , 

name <u, LUJ r length (LU.LJ , 

= X, chk£orpunct<ar1test} , !, "wriCe_terms (BeoC, Li4ldlcti) r 
write (' writ«_t*rw« f»*ac, L+rdlan^l) 
1 . 

% write: list inputs a tROLOQ liflt and priftta out a 5*titence li)s« liat- 

t vrapped. Idlet> i5 the etarting poftttion of th» tiet in th« output. 

vrita^liBt U-S|R«5t) , l<31en> :- 

name (JC. LU) . lengch (LTJr I*) , 

( chkfoxpLui^5t<X,R«t) r wr±te_termB (R«»t, L+rdlen) r 
vrite I ' ' ) r wyite^terma <Rest , L+IdLenvl } ) * 
%arite_Li3i: (+LiBt, 4ldlen, - I4tl«nout } ^ 

% writi_liBt prints o«t a sentence like li9t with wrappitig if tlac«S**i^y. 

% List; ia the liat to be printed 

^ Idlen ie the coluinn poaitiQO at a tart 

%^ Idlenaut is the Golvunn po a it ion at end 

write^list ( ( 1 , Lea, l*e»3 .• - ! . 

wxitOi«t i {x|ReBC5 , rdlen.ldlenout) f - 

aEoniic<X) , write {Kk , 

nametX.LtJK iAftefthtW,l<) , 

(L Idl^n ^ 74, nl, idlen:^ = 1, J; 
Idlen2 » 1/ + xdlen, I 

) , 

tchkf Drpunct^Xrfte«t^ , wrlCB_List (Rest , Jdlena, idlenout > . ! .- 
vriteC vrite list {Rest, L^Idlen^+L, IdlenoutV ^ J 

>■ 

tB_liflt<X? , write_li*e<Xr Idlen, Idlan2? , wrice_liBt idlenS, tdlenoutj . 

upper^f iri»t ( U(Heetl , CtilReetl 1 ; - 

(lfl_alpha (Tj) r Up ±B " 12, i } Vp » 14 , 
name (Ur [tTplfil ) , I . 

% vrite_terniB/l writea oat a word £ol lowed by blartK, except punctuatipn* . 

wx-it«_tferme fl] ^ : - i • 

^ caee where X ia end of StfnCeAce 

wrlte_tarcnsHX|ReBtJ ) : - 

= ' . » f X =s ' ; ' ) r * last word of eftPteoce 
writtt<XK t^Xi write^aentencee CA««t) . !. 
% caBe vhAr« K la incerior'ofi sentence 
wxite.Ceme i [K|Reatl ] : - 
write (XI , 

(cUcf ojrpunct (X.&eat] , writ«_tenna (Re&t) ; 
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writer* '), wrltu terms IReet) 
), I. 

^ writB_ternia (Llst.Used) s writes the terms in list and counta the nuittoer 
S of column^ used: starts a€w line if 75 c<^lun«iB hav« been used 

writfli_t«nns I [3 I. 
% At ^nd oE list. 

write_terTnc ( [ . I , _> =- vrit«^ nl . ! . 

write_tfirm« ( [ ?] write(';'J, »^t.J. 

^ X is a punctuation, don't add to finai count 
write_tcrTftB ( £X I R] , Ussd) : - 

( R - []r writ«<' '), wifite(X) , • 1 f 

chkforpv»ncrtfx,R) , 

write fXJ, write_terms<RrU5«4> ^ 3 

) - 

% X La laatr word in $«ntenc?e 
wTit©_c©Taifc < [X, . ] . used} : - 

H*itte(X, LiBt) , lengthtList, Len| , 

Weed iB Xen -f 2r 

ratal Is Us«^ 4 tleed^ 

(T©t«l -c 75, write (' 'Kwrite(K). write [.) 
Total > T5, nl, writhe 'l.writeWJ, write (.)) r 
nl, ! - 

t X is last wi^fd in aentenca 
writi_t«jfm« ( tx, ; ] , Uaed) : - 

namefjc, i.iatj , lisngthaiat, Len) , 

Need Is Len '^ 2 . 

TcktAl i« Ueed + Needr 

(Total =< 75, wTitcE' ')rwrite(x}, vrite<';'); 
Total > 7S, nl, write{- ')rWritc<X), write U >) , 
nl, I . 

* X la followed by ' , • 
write^termst EX, ^ ' |Re<tl , Ueedl : - 

n«ine(X, Li»t] , length tiilst, L«nK 
I4««d 10 Ijen +2, 
Total iB used 4 Kradj 

trotal 75 r writftC 'J.writeCX), write <•,'?, 
writ«_t«ri!(ifl (Rest, Total) ^ 

Total > 75r nl, writeC ' ^ write U), wx:itife< ' , ■ ) , 
New is Need - 1, wr 1 tester ma <ReBt, NwH , 
I . 

* witCS blank + name of X, U9«d ie length of nante*! 
wit«:_te»fnfl ( [x I RaBt] , Uaed) : - 

nametx, Lieb) , L^H^thtlilat^ Len) , 
Need is Len ^ 1, 
Tot^il la Ueed + Need, 

tTotal 75, write (• write Cx) . write_tftr<na (R«Bt . Total); 

Total > 7S^ nlr writhe ' ) , wif^lte (X) , Mrite_t«niia (AeAt, LenJ^^I. 
write_termB( |R«»t] , Used) :- 

nametx, Liftt) « length {List ^ Len) , 
He«d id I^d + 3, 
Total i9 Used 4 Need^ 

iTotaL --s 75, write C 'K wtite<XK write! " '^"J , 
wr i te_ te rma ( Rest , To tul ) ; 

Total > 75, nl, writetX), write_term5 CRest, Len>>rl. 
% proceaBBS 9«AtMC«i in Infile; writes formata to OutfiLe 
% Benbenbes beginning with ^4' are treated as CDimiBntji 
t^staeota Unfile fOutf lie ^ 
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seean^ile^, seen, see (In* tl<> - 
tell (Outfilcl , 

aQB{rnfil«)r asen^ told. 
% z«9^s. n&xt 4tezitence and proceeass It 

(X =%rtd_of_f ile, !/ 
X = i^off , ' , ' ] , ! ? 
X - •] . I 

X « readteatSf * don^t proc«4 comment*- 

preprc>«£ae (x^ bb, un^ef . Semli at , akip^ , 
< Undef = LI, 

doeent i X, BS , J,ls t ^ P^ftt, KeBsage^ iwpY eeaion , M, ch«etxxay , strict , 0 } . 
writ«_4en t ence ( X , 1 ) j wr it < ( S5 ) , nl , 

Uftdef \= 13, v»ifitft_afeRtenca<x, 1.) r wr±t«(BBj. nl, writ* fUndef I , nl) , 
readCd9t« % read next sentence 

)- 

% Reads in all sentflnc^s fir^swi inpiit f ilft and creates ^aie List all wntencea 
^et__i n^utaent a ( ^ re i * t , Tok 1 i at ) : - 
" read in [X} , 

tX =%nd of file, ToJclist - Pyevlistr !; 
X * [eoff,"^.'), Tokli»t * Prevlist. 
X - ['n. TokltCt - Prevlist, !; 

aastr'.XJ. append (TO); li *t, [■') ,X1 , I; %reTnOve 
Append {PrevllB&^X.Kewl 1st > , 
ge t. inpute«nt S (N^wl is c; . Tok ^ i 4 1 } 

^get^aentence (+A. -B, -C) 

% Gets naKt sent^rxe from input list contalnir^g all sentences r«ad in 
% Don't eod a sentence if «,« la preceded by a number and followfed by 
ft A nuirtoer and unit imaafrure - \.%S cm, l.s cmj .& cm 

or la fallowed Joy » which i* p*rt of abtoreviation 
% get_e»nt*rsce<A,B,c) - a is li»t of alt s^ntencea in report. 
% *Bii9liat containing one sentence 

t - c is remaind^t excluding B 

% agml tag for multi-wcsrd phrase containing * . ' that Lb not end of sentence 
get^sentenw ( C " < ' ^phr | Tail] , 9«rttence , Ijloeti s - 

«nclQsedPart (T«il,p5ijf,BecveBn,1lenft^^ % Betwen beg. p»±^t of ope« piir and 
Olo*4 tag of phr 

append { [seen, « r "" . Sem, ' 1 , MQteAttribwtt * , Between) , *Sem 1« value of sem 

•attribute 

xHor«Attrthutaa = C > ' 1 S^l^^^^^^J • Tars^tLiat = Phr^s-s, U 
HQireAt tributes - Ct,-, • " | l^^rgetPlua ) , * Target terma plua end of phr 
appBnd(Tar0«tLSftt, I "",'>• lPhr*3fe) .TargetPlttfl^. i * t attribute followed 
by actual phr««ft 

>. 

phraae ^ [wl|Rest}r 

append ( Phr a Be, 0RS£t, Sentence) r 

cc^nca C_a torn C Targ « CXiis t , Target ) , 

aaaerciphr (Kl.&em. Heat, Target , «r assart lex def ftecordin^ to inpiit 
%Phraa9 tWl|PIlest] ^ 
% abbrev < wi , I wi \ PRe a t) , Target . _ ) . 
get_5en tence { K^em , SRes t , LR< a t ) ^ 2 . 
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t Ignore sentence Btarting with , get next s«Titcnire 

" get_sentencc Cfteat,_,ii^ro) , 

get _5 en tence* C Raw , 5 eut , R«*fiainde r ) . 
get_senCBrtee{[X, .rV,Z|Reat] . [X, .3 r tV,"s(ilest] > % break up "140. 3*" 

" nucnbferiXl, number (Y),. % Y belongs bo *or new sentence 

get^sentence ( [K, ZfReat] r [MjSReet] ,ljRe0t:> ^ L,S cm 

number W , number lv> , 
t(wdef (Z,vinLt,_) ; z - jc) . 
a \= % br«a)t up "140. i+" 

I. 

ti«Lrtie (X. n f ^ name<.,D2>« name [Yr 03), nacne l'B-i.0O' ,X>4) , 
append n]>Ul>2,P3 J m 1 fD) , naiostl7,D) . % put aunber tagether 
get_sent«nce ( C^jftcist:] ,gRsst,l<ae&t) . 
^ <»?f«tnc>n abbrev 

get_Bentencenx, . |R«*t] /fxISReet] , LRftAtI % aljferev endixtg In "." 

% liat of concpon abbrevlablon& in reports should not «nd sentBQC^ 

cncinb«r(K, [va.dr.cm.mg;] \ , 9et_fieAtence(Rest , ^ReatrLResty , ( . 
% Liat of start of nairtftSP in report b ahOuL4 not end aev^tence 
get^aentence t [X, . |Reat] , [JC|5Reflt] ,LR«at) : - » abbrev ending in " - " 

Tneinl>er(X, [wflf .mr .Tnre^dr^ab} > , 

SKLpname (Eest^ReBtOK ^ skip naina p»rt 

get_Bentence (R^stO , $Reec , LReat) , i . 
% tnore Known abbteviationa 

g« t_s«n t ence ( twi | Rest J , [ ftap 1 8Re s t ] , u? ea t J 5 - 

abbrevch)c nwi |RwtK_r Rem^Rep) , % abbreviation 

gBt_a«nt4nce (Rem^ SReaL^ LtRest) r !. 
^ possible simple jcml tag foT ticw paragraph 

^et_5 entence <['<'rP,'/S'^'| Rfcati 1 , Sent , Rain Is- ^ skip paxragraph o^rkar 

get_sentancw ftie«t^5esit, RjCTn) , 4 . 
% xml tA9 ior- sentence •<b>' 

g*t_aftnt6nce {['<^,s,'>'| Tail] , SentencB, Rast) j - 
enc I osedPa rt f Ta 1 1 , s , Sent , Rest ) , 

<la&t I ' . ' , Sent) ^ Sentence > €«iit. ! ; %already has ^ . ' 
append (Sent, [. ] , Santanc*) 
) . I . - %Rdd • . ' 

get_sentonce ( [.. iRestl r t-) .Rest) j. *fend of a sentence 
Set^eenkcr^ce ( [ r (Rest] ^ L;3 iRaatJ J. 
^ int«3^ior o£ sentence 

g«t_sentencB(Lx|Reat] , [XjSReat} ,liRe&t) : - 

get_aentenc«(R«et . SReat^LReat) . 
get^aentertt^e < CI r CI ^ [] ) . % no more sentences 

t abbrevcbk(+«rordLiat, -AJ&LitfCr -ReTitlriat, -Target) ia true if an abbz^ev ia prefix 
\ of HordLiat, Racnlfl.«t ia auf fix of Ho^rdLiat (excLuaing praf ijc) , 
% AbLi0t is pr^fLx cone ia ting of abbreviation 
I an^ Target ia target form of Abbreviation 
abbre vchk ( m | Re a t ] , AbLi at , RemLi »t . Targe t ) ! - 

abbrev [Kl^AbListr Target, X>cm) . % abbt«v knowledge baae indexed by 1st wi>ra 

append IKbLiat, Rem, [iffljReat} ) , % remainder o£ ftbbrev. inust be in aQntence 

iDom ■ general, i; ^ abbrev. ^ppiies to all doiaaina 
dig(ni«ln(Thiarep) r Doin = ^iarep, % abbrw, applies to thia dccnain 
ia liat(l>oim), membez-fmisr^p, Doml «r thid domain in abbrev. List 

J. " 

( ft add back ■ . ^ to 6«c)tence if it also signala end of aentence 
Rem = C] , last* « , • rAbLiet) , ReniLifit ■ C ' . '3 , I ^no more worda 
i % vorda that generally a tart a new aentence 
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REin a fM2|J, LaacC . ',A)>liist) . nwmb©r(W2, [hifl,)ier,he, sht, the, this) ) , 
R^mLiat = I • , ' [Hero] , ! 
* don * t Add " . ' back 
E^etnLlst = Rbtti 

I . 

k skipnaroe (+Begli9t, -EnaitBt) : nkipB n-exr viot6 after "mr" or "sf 
BkipnamBl J 11 1 ! • 

skijmam^fl . ^ • " . ft|Rfe5t ) ,lifli*tl : - ^ "Luke's* 

5h:lpo*itwno, ^ ' ■ ' ._|ReacJ .Rft-Jt) :- l- "0'Gr«dy 
aktpnarne C [_i'ifietl ,Restl !-. 

t_€« c t Ion ( +Tokl i s t , - S«n t « , - Re a t * - S« ct iom ^ - &r inta^me , AiSdnol 
? Ttokliat. contftiAs input lletj 1st ^^ntenca sha'ald be a header-, 
» 5eotB arfl alX eentencsa in s«i7tion; $eoci<>n Ib name of -section 
% 9BfiC&nc€s at beg. of To>;li*t are ignored until a section b»*der is foxing 
$«t_««etloii(CT|Toj5li$t3 .Sents.RBBt.fiection, Prinfcodflne.AddnoJ 
% first sentence should bo section heaid^r 
crQt_&cntencfi( [TlTokLi^t] , sentence rRTy:>^4iiflt) , 

(isfttftitin^header i9Brtenee,Rseiit,S*etlotij Printrt^we) , % St-t^t^nce ia a section 

header 

Appen<i iRaerst , RTokli *t , RToklieta^ , 
g^^t_aectionaents ^RTcilcList^r Sent*, Rest) , 

(AddJio =» ! r ^ testing if input b^gifK with section- header 

Addno ^1, ! . aectnoiSectno) , iJewno is Sectno 1* 

retract all (3actn«»(_]l ^ . assert C^^cttio (Mewno> ) 
I . 

retract*ll tparagiiD<J ? . aefeert iparagno > ) * parag. section 

retrftctall (Bentno(_r} , aaeert(s«ntflO<on *-let sentence of parag. 
; % iBt sentence ia not a l^gitirnate header - return [3 
Section 13 

* get flection (JiToJtldst. SentB, Rest, eeetlonf % sHi.p till find header 

get_Bectioo((K (I , CI > [] J . 

get_»*C!tionB«ntfl ( [1 * CI # tl > ■ 

g et_a ect ion sent B tTn^KlletrSliet riles tl : - 

get_santene?ft{To)e:liBt,SBnt«nce,RTQ]Llist> ^ % one B«nt«aice 
<\*''»«c'tion__hfiader (eant*nce,_._, J , ttnorfe aenteA<ffcfi in sectic^n 
'gfet_eection3enta <RTPokLiat , RSent», Reed , 
append [Sentence , RSenta p dliBt? 

i \ fche next flection is a section he^ider - return 
• 'Tokliet. SliBt > [IK 

ceetion_h6Bder (S, R#*tar 'i'^ffQi^t clini^csai informatiort item', 
'CLIKtCAIi IWPORMAriCtJ: , ' ) j - 
(& - [c!ltnical, in foiTOAt ' = , ! . Keats = CI ; 

begeubliBt UGlinie*l, information, * ; ' 1 .S.ReatS) , 5j 
S = [clixiinfo, • t', ' . '] . ReatS = Hr I ; 
bagsublist C Cclidinfo^ • : • ] ra,R««t$) . l 
> . 

a eot ion^header { & , Rest d ^ ^ report impr eaa ion item ' , 
' IMPRESS JONi . '1 ■ 
CS ■ [impreBBion^ ^ : ' r . ] r ReetS = [] , 1; 
begaubllat ( [iropr*«Jionr ' 2'3^5,ReetS) , ! 
} . 

sectioii_h«ad»r(S, Heat, 'report; s«nniita*rv item' , 'SiJKWaRY: . ' ) .•- 
a IsunuAftty, • i ' (Rest] . 
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so ctian^hea^JesJt'lE, Raster 'report dB*«ttptlon It&m* , 'DBsCftlPTlQN: . ') s - 
(S = [<3lft*criptioii. • : ' , .J . ^«st£ = f] , I i 
begsubllst ndeBcrlptlon, * : * ] ,5,ReatS>, i 
) - 

BeGtion_KeadertS>Re3t.r 'rfiport diagnosis item' , 'DISCBAROE DIAGNOSIS :.' ) 
S - Cfinal^dia^no^ie, • : < |Rc*tl ; 

S - [principle, diagnosis, ' t ' |Rest.] ; B • [asBDCiaCed , didgnoaia, ' £ ' [ReaC^ ,- 

5 = (tr^A^fer ,dlagnoBla, ' : ' [Rest] ; 

6 a [di^gnoaiB^ '(' /CB^ ')',': ^ {ReaC J ; 
& - (dlagnoals, : |Re«&J 

). !. 

BBCtion_headftE'<5rR«fltr ' report laboraco^'y data item* , 'tAB MTA: . ' ) : - 

S- [laboraLory.datar ' i ' |R»«c) ^ J. 
eeccloii_headBr |5,Ro3t, ' repoi^t medlcacione it«w*- •HEDrCATIONfis . • ) i - 

a = IrnadicAtiOAir * = ' l^^^t^ ' ^' 
aocciQn_he«der fs.Reet , ' report cur^nt medicationB item* , ^MBMCATrOHS ; . ' 1 : - 

$ « [current ^msdications, ' : ' |Reetl , \ . 
flection_heai5er l5,RflSt, ' report diecharga medications itara', 
•DISCHARGE MEDICATIOPTSi . ' ) - 
S a Idi&charge , TnedicatiQns , ':' jReat:}^ 
Bect±on_hfiadeY(9,R«st^ <r*port discharge disposition, itflw' , 
■DISCMAROB DiePOSITlOM: . ' ) :- 
S • [diflPCJterge, dispoaition, • ; ' |ReflCl , J. 
Bectlon_}ieader (s^RBdt. 'rcpoirt nnedicatlDnB on jidtnlfision Item' r 

S = [mealcationa.on.adiaisaionr ' ; • 1^®^^^ ' ^■ 
»ecrtio(n_hfiaderiS,Rest, 'report cncdic^tionB on crancfer it&rro' , 

8 =3 [m^diotionSf on, tranafax^ ' i ' |R<5fltl . \. 
««etion_Keadar (S , Reat , ' report procedure item ' , ' PROCEDURE; . ' I i - 
(S - Coperationr ' : * |R«tt] ; B - [proceduro, • : ' |Reet] 
\. i. 

se c t lon^headfi r < S r Rea t , • report iftdicat Ions £ or pro<!«4ure l C am S ' 3Wi>r CATIOWS t 

(S - [ittd£cAtton«, for , procedure , ' : ' |ft«»c] / S ■ 
CindLc:»tipti£, for f ope ration r ' : ' iRast) 



s«ction_tieadfer {Sr Rea 'report preoperAttve diagnoeis it«m'f'PRBOP DIAiGHOSlS 

S m [preoperative, dia^nosifl, ' '-^ |Reat] , 1 . 
BacGion_hoadar(a,Re»t, 'ifepcrt admitting diagnosis item" , ' ADKITTIM 
D3A<3EMOiSIS» , 

S - [admitting, diagnosis. ■ : ' |R«st] r ! - 
aectlon_heade3r(5^ReBtj "report postoperative diagnosis item' , 'PlAGlfOSId: . ■) 

S « [poBtopBrativa,dia9D09l«, ' : ' [Rest] , i . 
BQC&lon_h«adar (S^Rast, * report physical ax»miAat£on item' , 
■I^KVSICAL EXAKj . ' J 

5 * (phy«ic«l,eK«ir!ilnatiQn, • : • jRaat) , ! . 

BB^fciCM4_h«aaer(&^ Rest, •report chief coitq^laint item*^ . •CHIBF COM»lAr«T:.M 

6 - [chief, complalne^ ' : ■ jKast] , J . 

ae<rtlon_hieadex(SjReat, 'report hoaptcal course item" , •KQSPr1^ftL OTURSB: . '> 
S s ThoaplcaljOourBB^ ^ : ' |Re0t] , \ . 
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s«i?tl<3n_headex' (S.Reat . 'report allergy it«rt' , ' ALLERGIES i . ' ) : - 

S ■ fciliergiesj ^ ; ' |fie£tl , \. 

sect ion^hfiadar (e, Rest , 'report foli^Tw up item' , ' FOLliOW PP : - ' ) :~ 

s = ffollowf, up, ■ : ' iReat] , I . 
sect ion_headflr(S, Rest, ' report f it^dings ifcctfti' , 'FINDINGS : . ' | ; - 

5 = [f indinge, ' : ' iRe^tl , !. 
aection^hB*i5cr ts.Rest, 'report indicsatione and findings item' , 'FINDINGS: . ' I t 

E •= Jipdicatlons^and, findings, : ' |RfietJ , i . 
3ectioo_head€r(3, Rt:4Pt, • repc^rt iociications a(\.t$ findings it©a^\ ' IMOICATIOHS : . * 

B - [indicatiotia, ' : * |RfiBt) , ! . 
cection^headci* (6, fteat, ' report provisional diagnosis ibeiti', •PRBLIM DIAGHOSIS : 

S - Iprovlelanal, diagnosis , ' r ' |ReBt3 , ] . 
Bection^beader 45, R«et. 'report review of syetems idem' . "SEVIBW OP SYSTEMS;.') 

5 Ireview^of .ayetems, ' i * |R«st) , ! . 
aectiQn_head&r{B,ReBt, ^report past hietory it^m' , 'PAST KBDICAL HISTORVi . ') 

5 = [past, hiBtary* section, • iKeatJ . \ . 

Baction^header l.&,R«st , 'report past hx»t*iry items 'PAST MEOlCAlj RrSTORY:. ') 

6 m [pd*t,ftvftiSical,)iastoryr ' ; ' JJliSatl . ! . 

aection^he^dear {S.Rest, 'report aocial history item' , 'ftOCIAI* KTSTGRYi . ') !- 

S = [socials hi ptcjr>'r ' ' | Rest I , j . 
sectic?n_h«adi5r(S,Rfeet, 'report p**t history itecn'. 'PAST yiBDlCAL HISTOR'iC: . '] ? 

• r - Chiatory, ' : ' |Rest] , i. 
eection_header($,Aest, 'repiMTt paat bistpjry item'.'PA^T MEDICAL HISTORY;, ^^ : 

S= lfori*f .bl4t:ory, • : • jRestK i- 
scctiQA_h&ader (S. Heat, ' report history of presTOt lllneea itflcn' , 
'BISTORY OP PRBSBFT ILLNESS:.') 
S = [history^ of, present .ilLnea a, * i ' |Reat] . • . 
s<tetioo_Keader<Sf Beat, 'report hi*tpry of present illnesB item*; 
'HISTORY OP PRBgBVT' ILUTBSS : . ' ) : - 
s = [history, <?f, the, pre a enc, illn«s&, * ; ' |Reet] , i . 
fi«ction_he*der<5, Beat, 'report sp#clmfen item* , 'fiPBCIMSW' 1 :- 
S - [apeciwenlReat] r K 

% sentence <?oftaii9t« o* id iiumber only or " . " only. 

integer U> u 
ieldentifier<LXr;l3 
integer (X) , 

isiditnCifieri [.] ) :- i. » aenbertCft consiats only OJE - 
istdesit if i^r (['.', ^<ao3>'^3 ) s- !. 

ieidentif icr(C'< ' ,p, • / S J > * paragraph marker sentence - update no. 

paragno tN) , 

t^tractftXl (paragno (^J ) , 
N&vno 1b tf + 1, " 
aaaert (paragno (WewrtO> 1 , 
retractail CsentfloM } , 
39S*rt(««ntCiO to)) . 

^ *>eipeenteoce is trusj if sentence ahould b« ignwed- 
% SJeip aeiitencea- containing t^nkily info 
ekipe entencQ t IX | _3 > j - 

f ounduord (X, family) , I , 
akipB enhance ( (X | _j J ; 

fo^rtdwotidCX, incyr^iice) , i . 
% Thift oceura it ft^ntenca contains 
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^ A aequonc:^ in akipB database nnd sentence aiao concain? findings. 

sklpeftrtttnceUXlRestl i Sftmlist , Birror) ; - 

ak±pfl(LX|63eqJ), * X is the beg. of subseq. in 3kip database 
prefix t (X|R«etl , [Xls^t^^j] ^ , % seftC^nce containa aubs*^. 
(subtype S^mlSst) ^ % sentence containa information to be fixtract€<* 
Srror o no? % don't try to segment 

Bxror ■ yesK % treat ««ntenc« error and try to eegmant; . 

* Jc ipaen tenee ( [_ | Re e 1 1 . S^rnl i a t . Er ror 1 r - 
B)cipaentencB<AeBt, SemLiet. Error) . 

1r f indingseg -F^eg, -Q«g9eg> : parf^itiona sentence 



t s is the sentence; Begseg ia the aesment ^^eeedins the 
^ iRodiflars of the findilng; Faeg is the segment ot 6 ^tartin^ 

V with the lert«w>^t modifier ot thft finding and ^soneiats ot tha 

t reenainiAs aentence. 



€ Inding a eg {5 , Fseg , Begseg } ; • 

partition <G,Be$p«.ft,Hei!Epart) , 
(^egparb * [] , Begs^g - [ 1 ; 
aeetpart - [] , Faeg = [}^ B«9«eg - Sr 
rightiBttnod (Begpart , Be9^seg,Modeeg) ) , 
append (>]ode«$, R&stpArt , Fseg) . 
f indingseg (] 5 . 

*ctiOTifi«dingBeg<s, Fseg^Begsag^ : - 

partition^S.Bs^pNAft.Re^tpart) , 
(Bfl^part - n , Begaeg - I] ; 
R^Sttpart - (] , FBBg = [] , B«3ceg - 
xeverse (Bagpart , R«v»r«edBefore) , 

f indavjjo* t*nce < R« ve reedEe f or « , R« 9 1 ) , 
agp&nd [Siiha t ancepa rt , R«« t , Reve reedBef or* ) » 
r a verse ( Bul:^s toinoep^t:' t , Leftpart) , 
je-v*r£:*<*Re»t .Beg^feg) , 
Appenddief Cpart^Reatpart, Ps«g) ) . 
ftctionf indingBeg [] , _) I, 
fin5aubacar»c«( [] ,1) ) ! - U 
f indsubst-^rtceC [x|iiestD ,ReBt) ;- 

ff'jLbstance [X] . (] > . I . 
f indfluhacanceC rxlReatl] ,R»*t) 

f inds-iibstanco (Ke«tl,R&3t:| . 



t partition (+s J -Begpart, -RestpartJ ; partitions e«\tence 
4 & ia initial 

\ partitlon(+S. ^B«3part r 'F«9tpart} : partitions aentence 
% S 4fl initio 2 ^sentence; Begpart is part of aentenc* before' the 

- t finding r Heat part ia the rest of the asntence and starts with 

% the finding > I£ th^re are 2 consecutive findings 

the 1st ona id confidered a Tnodifi^C 



partitiontn, II, CH !- 

partition ( [X|Re9t] r |Begpart] jReatpart] : - 

not flafindlng tx> J , i , partition (Re«t,Hegpart, Rest part) . 

partition Crx.Y I Seat]. [JC3 r fY iR^ilC) ) 
iafinding(x> , isfindine (V) , I . 

partition([X|RBst] , I] r lXjR«tl) 
iafinding(X> , I. 

% iafindinl9C•^X^ : i« true if x is a vor4 or phraaa tfhoiE;^ semantic Qlasa 
^ l0 a binding or eubtype o£ finding. 
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l8f inking (X) 

f ov4:idword<X,Sem^ , * semantic claea of word 

5viJbtyi>e5__, [Sam] J . t ie class a type of fl-indiiigr r«eonimend^ « technique 

4 sflTn«nttc cl«ss vfjiich are t/pce of zelavant infortwation 
subtype < f ind iflcf , Sam J : - 

In te rsftc t ( Bem , I *t c ach , ere a tal^ond , br ft^kljcnd . act i vate , 
inactivAte, suij^tituce, transcribe . estpr^aa, promote, 
&lgn«.L] \ ■ 

% there is txnly one type of t«c>migue ciass 
subtype ( fe echni que r Sera j l * 

meniber lt.e<;])AiQu«, Sftm) . 
eubtype ( t iioe r ^^^^ ■ - 

intersect (5€in, [etatus^aatatus, chBtn^e. tmp^r, vstatua] } . 
£indlngitillBt(Seni> ?- 

intersBcrte^^, lattacb^ creatflboindr breakbond. activate, 
inactivate , subatitute , transcribe « eTcpresa, ^jromote* 
^igaall ) . 

\ chkforpunct f*W,+Jiest3 : ia true if there should to« no epac* »^ter word W 
chkforpwnofc meonb^r [W, i " t ' j ' 1 ' # 

'{^ '}'. ' + '.■ = '.' r. '\']}, 1. 
% nothing left to write. 
ch>;forpujict<W, [] ) . 

% is true if there shoiild be no apace before word aft«r current w?rd 

chkf orpiinct C [W | ) ; - 

% i»p\irtct(+w) ia true if W is « punctuatioii for Bfintence. print out 

t The following chsrftctera are not treated ae punct i - ' ^ S * R * 

ispunct IW) i- nwstnbtr (W, [^,^ '.'r'rS'/S . '>'. "'^'t'. 'J'. 

» ^) M', *^ ' . ' = '> M ' . '\' I r^9'] J ► 

% rightletitod^List, Pir«tpart,flto«3part) : h5*i5part begins with the first 
% word in Li«t which ia a Tnodlfier; Pirstpart ar« tha pxecedizi^ worda 
rightljKfetftodt L] . 13. (] ^ i. 

( X ia a modifier or finding; Beginning part ia empty 
rlghtlatmodS CXjRftj^t] , CI , [X|seBtJ ) : - 
found word fx, Sem, Target) , 

{cnoditfierfsem) r Sam = p, TArgret - t^^connj; subtype £5«itt] U ^ i. 
V X is not a cnodifiar or finding 
righti.atmod( [X|Rc5tl, [X jrirstpart} .Modpart) 

rightJ.itrttGd(ReBt/FtraCpart,P^p^irt) . 

% frame iPraraa, Type, Vaiu*,Wcdfl^ : creates a list Framai whO«a iet 

% fllemvnt i» Type. 2nd eL-ainiant i« Value, and 3rd iH a list o£ 

% cnodirier frames or is emtpy 

% Cat?* >^her« modifier liat as «in^tv: Value sliould be atoiin sxcftpt for 
% Cfertatn typaa? 

framei CTypa, Vain*), Type r value, X> 

^ Special casee where value of type ahould b« d> liat 
frame < iType, ChJe]] .Type. tH|R] .XJ - 

oklist (Typ«) r I • 

% Modi f la r liat ia imrged with list coneidting of Type and Vb1u« 
t r ama t Pr <wfle , Type , V^lg^ , Mods ) : - 
atom ( value > , 

append < [Type , Value ] , Mcdt , Frain^ ) , i . 
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f ram* (Fram^, Type, IH|r] , Wojla) ?- 
i*_list (ft) , 

append IR, Moda, HewKods) , 
append ( [Type , K] , weKModSr Pramc) , !, 
i CompojiBnts of Frame 

frame C [Type, value |ModB3 .Type. Value, Mode) J. 

% Value oe Type ehould not be a List? first Blerottftt of vaLuft la reatL value 

frJirteC (Tyi^rH. Rest) .Type, tB|Reetl . []) i . 

^ Special caaes where value of type should be a llat 

»£Tame ( [Type. [B [ RJ ] . Type , [H\RD, [] } t- ^repeated from rule iibove 

% okliat(TV¥>e) , !. 

If Value of Type should not be a list; first eLctnent of value is teal value 
fT-atn* [Prdffle , Type, CH | Re4f t ) , r»fotf*) : - 

mergemodB^Redt, Woda,NewModa) , 

append ( [Tys>e, H] , Mei^Oioda , Frame) , 

* mcrgernodlnf [-Fj 4pr*rne^, *Mc>da) ; Fxame i« a type -value -mod fTaire? Hoda 

% ie an additional set of inodlfiera for Frame ; irtergemodlnf adda Mod» 
% to Frame ^ resulting in J', 
tnarg^modirtf (£],[] I . 

mfirgemodinf (F, [rel , X |Rest] ,Modrel> :- 

mergemodinC [PI > Reat, Mcxirell ^ 

Append ( [rel , x) , Pi , P) , ! . 
- Tfiwrgreirodinf (p, [Fi,x|Modf io) .Hodrel) 3 • 

atomtPl) ,fn«rgemods [wodrel, Modfin, Wod) , 

•pp*nd ( [Fi , x] , Ptod, P3 , I , 
Tiuer^eittodinf fe, Eh|r] ,Modr»l) : - 

merg Bmjodtin f ( PL , K ^ Modre L ^ , 

cnergrnnodinf iF2 ^ Modral \ , 

append ( [Fl] ,F2rFJ , 

* addniodBtof <+Argar+M<5dft, -NewArgs) is true if Argfl la a list of format*. 

*p Mode ia a list mc)difiera and NewArgs is a Xiet of formate where Mode 

t h«i; b«en added to modifier list of that fprt«at 

«<ldinodatof ([] ._,[]>:- | , % no more fgnnata 

addmodatof ( [Format! I Reet] ^Mode, [Pl|KewReflt3 ) j - 

niergeittodinf <F1 Format l^Mc4») . 4r merg« modi fi era iato lat f^rrftat 
addmodatof (Reat,Mods,Nev*Reat) , 1. ^««a modifier to remaining 

* okliat(+Type> s ia trici* if Type can have a list as ita val«« 
okliBt [unitvalf , 

oklist [a^^h , 
OkLi»t (meaauxe) . 
oJcliat (prev_tiro»unit> , 
okliat (f uture:_exaittJ . 

* ni*rgemode(+Mtodai,+P€ods2r-Mod> : Model and Wodaa are a list of modifier lista 
^ Hod ia the merged liat; aome elerthenta of MOdsL and Hod«2 may be 

* empty 
mergemodstC] ,K,M) J. 
mBrgemod£(Mr C] r^) - 

mor geffliada (Asoda 1 , cooda 2 , Mod ) t~ 
delete <M&dslr [1 .MX) , 
delete (rfedsir f] .M2) , 
append (Ml , M2 ^Mod} • 

» addmod<tModr+Hodliet,-NewMod) ; MewNod la formed by including 
^ Mod intio MOdliat 

addmodni .Mod^K&d) . 
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afidirvod (Mod, (] , [Mod] ) :-• 2 . 
acSdrrit>d (Mod , Modi i st , Nawftoi j i . 

^ mo<4list (+Li»tC>tfMo<Ss, -Hod^] i LiatofHods i5 A list oonsi^ting 
% iTvdividual modifier tv^n&a, s&me. of v^hich m^y Jbc ftmjcs-ty 
* Mods is formed ac a liet of rion- empty modifier^ 
TnQdliBtU3. IJ> -* . 

% i^ore 9. mbdifier which is An Bm^tiy list 
modli*tf lU |B1 .Mode) i- 

modlist (R.Nods^ , 2. 
iiiGdliatC[CH|SlJ 

ot&oni(H] r ) ^ 

fnodiiQt (R2,Rm0da) ^ 

addnod([H|Ri] .RiEpdB^rtods) , 
iRodliBt a [H|R1J |Ra5 ,Hoda) - 

is.liacCHI, J , % is firat sKm^nt la a list 

modli^^ (R3 ^ lUnoda ( « 

me^rgeinods ( [ H | Kl ] , i%nvod», Hi>da) . 

%)3pfr*met creates from for a^gu^nt^e? of bcdyloc/re^ioin/poBitiOti 
bpfrawfttF, (] ^_,P^ [n ■'- 5, \ oaly 1 bodyloc 

bpfraTna(F, [] ,'ri'pw.Bpl,Bp2j * no conp relation tauc mora than i tjodyldc 
frBtfne<&pi,BpiTvpe,HpiVsl,BplWcidaS . %contc«fca of BpL frame 
£rftmft<Bp3.Bp2TVpe,Sp2V*i,Be>2Wods) , ^ciwitfintfl of B|^2 frame 
( {BpLType = T«gion; BplType = position} * 
ep2Tyj>e - bodyloc, \ «^left lung', ' arjia of iwig' 
tnet^fimoda (SpiHDdflrBp2Mc>d«, BpWodB^ , %nftw rfegloR moiSiliar 
framafNQwBpaModB.EpXrype.BpLVai.BpModei J liiQw Bpt frama w n«w irod 
fxama (F, BpSType.Bp^Val, CnBwftp2Mod*] ) k inatn trame is boOyloc 

HplType = bodylocr B^SType = bodyloc, ryP® = maiTi, %Bp2 is mwiA 

me rgemods ( BplMada , BpSNodB , fipHod^ ) , % nev bodylgc modi f i «r 

f raiM {NewBpSModa , Bpi'ryp«, 0^3 val . Bpnoda) r % ^ ^oint of shoulder • 

JCx^amaCFrBpZTyperBpSViiI , [UewBp2Moda] ] t inaln bp irame la shctgldea^ 

? 

msr0«mods f BplMioda , Bp3»ods , BpKc^d^ ] . 

ffc*m^(HfcwBplMQdB,BpaTyp»,Bp3Val,BpModfl) , % 'ahouLdar joint • 

frame <F,Bpi Type, Bp 1 Val, [MewBpiMOdsl > \ qiain \>p frame i£ shoulder 

bpframfl (F,RwLr_. Bpi.rBp2) :- % no ccnj ifelation but rt»re Chan 1 ijodyloc 
Sal - Crel, CDtij 1^3 , Bp2 I], 
me rgermode I [Bpi J , TspS ) , Oon j ar g a ) , 
fTanse(Prrai,Con5,0»ijaj(ige) * 

getr«l A t iort ( R , , P2 , F3 : - 
(F2 \- tJ , . . 

(Fi = [relrConJl|JlCi*ti3 . R = [rel^Ccmi] , 

{Con^l - ^ , ' r Conjl « c^r* Conjl ^ and), 
IConi - ' , ' : Conj - orj conj ■ ^ad) ; 

Beeti - IFi]}, 

(Conja = C?onj2 = ot? Conj2 s and); 

%aplice(tft.n«fttL,ReeC2] .F) ; 
ap^Tid([R«Keati^ReBtz] ,F) i 



6^ 



BNSOOCIO- <WO_00e3687AlTL> 



wo 



addetocal 

X =< SO, 

Hewx is X + 1, 

rBtr«ictftIl (ad{3s total tx} ) . 

«t s&e r t < addsto 1 ( tfevix \) , j . 
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Appendix £ 



SsavB j 




■ = 


AAAC; 


$saVc { 


• b' 




AAAG* ; 


$5av^ { 














'AACC ; 


^BBJve { 


' 






$eave { 


'f ' 


f = 


f AACr^ ; 


$*ave j 




^^ 








t = 






' i ' 




•AAGT' ; 








AATC r 


$55*^6 ( 




\ = 


'AATG' ■ 




'l^ 


[ • 


'AATT' ? 


5aav€ { 


'm' 




' ACAC ' r 




'n' 


\^ 


'ACAG' : 


$Bave { 


'o ' 


1 = 


' ACAT • ; 




*P' 


1 ^ 


'AGOC ' .- 


$Bave { 


'q' 


1 = 


' ACCO • J 




r 1 




• ACCT • ; 


$Bave { 




i = 




f 5" 


• e ' 




*ACGG' ; 




'u' 




^ACOT* ; 




• v 




tACTC ; 


^BBVe { 


' w 




AMG<; 


$save { 


»x' 


f a 


ACTP' 


$Aave { 


.y. 


> = 


AGAG' ; 


^flava { 


» a' 


f a 






'0* 


^1 


' AGCC ' f 


$3«ive { 


» 1 ' 


is 


•AOCQ' r 


Ssave ( 


•2"' 


f = 


•JW3CT' f 


5 save { 


I i ' 


1- ■ 


'MGC y 




.41 


\' 


• AGGG ' / 


5 save ( 


'5' 


\ ' 


'AOOT' J 




' 


^ . 


'ASTC' ; 


Sesve{ 






'AGTO' J 


5 save f 




^- 


' iW3TT ' ; 




•9' 


I s 


• ATAt* • ; 


Saave{ 


' ^ 


\ = 


'^TCC ; 




■1 ^ 




' ATCC ; 


SBave{ 


■ i' 


1 = 


'ArCC* ; 




' ^ * 


1" 


'ATCC ; 


S save { 


1 . L 


i = 


'ATCC ; 




■ II r 


1* 


' WTCC ^ J 








= ■ AfTG ' 




» ■» ^ 




'A'PCC ; 


Seave{ 






•ATCC* ; 








' CCCO ^ ; 








, CCCT ■ f 


$Bave { 














^CCOT' ; 


^aav^ { 


• * 1 


1 M 


CCTG' r 




' <' 




•ATOC' f 


I^B&V« { 


M • 




• ATCC • f 
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Seavej 






S&5ve { ' ' 


- 




$<6>a ve { ' ' . 




'COTG' 




, _ 


«CGTT' 


$gave( * { ■ 




'CTCT» 


1 , ■ 


1 A 


' ATCC 


$Bave( ■ . * 


1 s 


' ATCC* 


$save( ■ 1 * 


1 * 


■ CTTO' 


'« • 




• CXTT ' 


Ssave{ V 


1- 


'ATCC* 




^) 




$atavfi{ '^fi ' 




. GTcrr ' 


$Eave{ *\n* 


'] 


= ^A'rcc 


$Bavc{ ' < ' 


1 M 


'GTTT ' 


$<;ave( ' > ' 




'GTTT' 


$aave( ' 
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AppendlK p 

# f /uer/bin/perl 
*| Scan, pi ^ 5c«cv* blast output 
ttfttithor: Michael Krauthammer 
#Copyv^ghts c 4 1599. Coluipl>i^ Uciiver3it.y 

S i jipu t_f 1 1 E = "^cjenebsnk . r esu 1 1 ^ ; 
5outpuC_f ile= "match . txc " ; 

^iopen <j^tA«treain for file which f:ontftLn3 taUst output 

open (JMPUTj ' /ecora^e/psi blaBt/narklt/programB/markie . c^sult ■ > ; 

while ($line=^TWPUT»^{ 

if C$iine«-/\>gi\| I - •) V. C . -^i \r ( . *l /) ( 

$ tar get- $4,' 
Sgi =$1; 

$sama»tic claae«$3i 

) 

if ($linc— /Uersgth - <.*)/»{ 
$len9thi=$i> 
) 

If ($Uti«--/7(Sefttltlea \. <\a*\\//\\ 
$lezigth actuals^l 
} 

if ($iine= -/Query: (Vd*)/K 
1 

#prlnt if Sub J 1. Baffle blitie a fm&t.<tJi 2 or 3 line lon^ 

if (*lin«.-/9b7cti 1 /K 
ir f<$lfrngch_accual/$lengeni) ^ .9l{ 
print 

Stargat, | 'S Satart, " | , $Btaxt+$l€ngthl> " | ^Bfiniantic claae. " | " , $gl, "\a"; 
} 
} 
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Appendix ^ 

#A\]t)idr: Michael Krautha-mmer , C.L999 CeluinbiA University 
cxpei^ t rwpirr. $ARCV (0) ) ? 

#read unco4ed input tsxt line by line (chop it J 

while {$Ilne«<2N[>ur>) ( 
^ll«$AlI.$llxie; 

open flJJPUTIIr '/Btorsge/pai-blaat/Harklt/progifamB/iKiAiffeltll. result r 

open (otTTPUT, • >rcBUlt . txt ' ) j 

S first purt: cHeck nubtcl\e«, «tor« po^ibioA«» 

wKile ($line-<£NPt;TIt>] { 

l^name, Sftt:art,Send,$B€aiant.ic^claBB,Sgil=$linfl=-/t.*3M 

^(Slvi<^c by 4 (4 letter code) 
$Btarc= ($8t;»rt-l]i /4; 

«f^ee aubstring 
if (^Bt^rt I- 

Slettet^at-atuhBCrt^all, SBtart-l,$eni5-$fitart+2) .«| "J 
} elee ( 

$lect«rc; - ' ■ . aubstr (dall. d. $end+2} . " | " ; 
l$letbcT.b«9tnnlng) r-gletters— / 
$let: t«7_eaidxi«subatr f $al L . $«nd, 2 \ • 

IV ignore nacchstf that Are in ttkc HIC»DX,»B of BftntenccB, allov plural a 
Slcct«_beginning— .tr/ lA-21 / la- b1 / 
$leCter_Bnd=-Cr/ CA-Sl / [a- zl 

if ( n <ilett«r_bfigiiiiiiin9— ./ /) i St£ K ! C91fettet_end— / (a-s) /UN 
($lfttter_endr3=-/a /) ) I { 

ttmaKe dure Qnly t.he firat. occuY&ncfe 1« a tor ad a I: tha« 9«Aitioii 
if (5davQ(^atArt}««' { 

$BAve{ $• tart )=S end- ' | ' .$flefi(Leintlc_cla*fl . ' | • .$^1; 
> 

{ S end^key 1 = S 3a ve { $ key J = ^/^ ( . * ) \ | / ; 
if ($«na_key>$en(3l { 
if ($)teyc$atart} { 

$8av«{$0tart.) = *&ull' , 

} 

} 

) 

} 

} 
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4l£fecpnd part ; P'-int out inair>cad wp document 
sort (ftaa-vel ; 

for ($i-0;3i<len9th<$4Ll) 731+-*) i 

if (OSsAve($i}=='jiull') l$save{ei}— /./)) ( 
<$eiiii,Ssb<ttftl(itic„claas)=Ssave{$i}=-'/ ( - *1 \ | ( • \i /.* 
Mint OUTPUT •cphrs'-'.^sftmantLc^claeB,^"?'; 
$etore-aubBtr (Sail, Send ; 
print OUTPUT ^score; 
pxrint WTPUr «</phr>"; 
$i=$End-li 

) clac { 
$Btorc=Biii3Sti"t$all j Si , 1) : 
print OUTPUT Setor*,- 

) 
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